From e70138b61033081e3bfab3aaaec5997716cd7109 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Wed, 13 Nov 2019 00:53:26 +0000
Subject: [PATCH 01/24] Recursive tokenization (#3515)

* recursive tokenize

* black

* What's New

* Also test Dataset

* Also test IndexVariable

* Cleanup

* tokenize sparse objects
---
 doc/whats-new.rst           |  2 +-
 xarray/core/dataarray.py    |  4 +++-
 xarray/core/dataset.py      |  6 +++++-
 xarray/core/variable.py     |  8 ++++++--
 xarray/tests/test_dask.py   | 26 ++++++++++++++++++++++++++
 xarray/tests/test_sparse.py |  4 ++++
 6 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 96f0ba9a4a6..620617c127a 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -73,7 +73,7 @@ New Features
   for xarray objects. Note that xarray objects with a dask.array backend already used
   deterministic hashing in previous releases; this change implements it when whole
   xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
-  invoked. (:issue:`3378`, :pull:`3446`)
+  invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 5e164f420c8..a192fe08cee 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -755,7 +755,9 @@ def reset_coords(
             return dataset
 
     def __dask_tokenize__(self):
-        return (type(self), self._variable, self._coords, self._name)
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._variable, self._coords, self._name))
 
     def __dask_graph__(self):
         return self._to_temp_dataset().__dask_graph__()
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index dc5a315e72a..fe8abdc4b95 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -652,7 +652,11 @@ def load(self, **kwargs) -> "Dataset":
         return self
 
     def __dask_tokenize__(self):
-        return (type(self), self._variables, self._coord_names, self._attrs)
+        from dask.base import normalize_token
+
+        return normalize_token(
+            (type(self), self._variables, self._coord_names, self._attrs)
+        )
 
     def __dask_graph__(self):
         graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 916df75b3e0..f842a4a9428 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -393,7 +393,9 @@ def compute(self, **kwargs):
     def __dask_tokenize__(self):
         # Use v.data, instead of v._data, in order to cope with the wrappers
         # around NetCDF and the like
-        return type(self), self._dims, self.data, self._attrs
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._dims, self.data, self._attrs))
 
     def __dask_graph__(self):
         if isinstance(self._data, dask_array_type):
@@ -1973,8 +1975,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
             self._data = PandasIndexAdapter(self._data)
 
     def __dask_tokenize__(self):
+        from dask.base import normalize_token
+
         # Don't waste time converting pd.Index to np.ndarray
-        return (type(self), self._dims, self._data.array, self._attrs)
+        return normalize_token((type(self), self._dims, self._data.array, self._attrs))
 
     def load(self):
         # data is already loaded into memory for IndexVariable
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index fa8ae9991d7..43b788153bc 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -1283,6 +1283,32 @@ def test_token_identical(obj, transform):
     )
 
 
+def test_recursive_token():
+    """Test that tokenization is invoked recursively, and doesn't just rely on the
+    output of str()
+    """
+    a = np.ones(10000)
+    b = np.ones(10000)
+    b[5000] = 2
+    assert str(a) == str(b)
+    assert dask.base.tokenize(a) != dask.base.tokenize(b)
+
+    # Test DataArray and Variable
+    da_a = DataArray(a)
+    da_b = DataArray(b)
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+    # Test Dataset
+    ds_a = da_a.to_dataset(name="x")
+    ds_b = da_b.to_dataset(name="x")
+    assert dask.base.tokenize(ds_a) != dask.base.tokenize(ds_b)
+
+    # Test IndexVariable
+    da_a = DataArray(a, dims=["x"], coords={"x": a})
+    da_b = DataArray(a, dims=["x"], coords={"x": b})
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+
 @requires_scipy_or_netCDF4
 def test_normalize_token_with_backend(map_ds):
     with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file:
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index a31da162487..a02fef2faeb 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -856,6 +856,10 @@ def test_dask_token():
     import dask
 
     s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+
+    # https://github.com/pydata/sparse/issues/300
+    s.__dask_tokenize__ = lambda: dask.base.normalize_token(s.__dict__)
+
     a = DataArray(s)
     t1 = dask.base.tokenize(a)
     t2 = dask.base.tokenize(a)

From 94525bbaf417476dbe9a70b98801ae04aceaebf3 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Wed, 13 Nov 2019 15:48:45 +0000
Subject: [PATCH 02/24] Deprecate allow_lazy (#3435)

* Deprecate allow_lazy

* add whats-new

* test that reductions are lazy

* minor whats-new fix.

* fix merge wahts=new

* fix bad merge.

* remove tests that only work with nep-18

* Update doc/whats-new.rst

Co-Authored-By: Mathias Hauser <mathause@users.noreply.github.com>

* Update xarray/core/variable.py

Co-Authored-By: Mathias Hauser <mathause@users.noreply.github.com>

* fix whats-new

* Fix test that assumed NEP-18

* fix tests.
---
 doc/whats-new.rst             |  3 +++
 xarray/core/common.py         | 17 ++++-------------
 xarray/core/dataset.py        |  2 +-
 xarray/core/groupby.py        |  4 +---
 xarray/core/variable.py       | 13 ++++++++++++-
 xarray/tests/test_dask.py     | 18 ++++++++++++++++--
 xarray/tests/test_variable.py |  4 ++++
 7 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 620617c127a..212e465b368 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -88,6 +88,9 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
+  In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
   :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
diff --git a/xarray/core/common.py b/xarray/core/common.py
index d372115ea57..2afe4b4c3a7 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         if include_skipna:
 
             def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
-                return self.reduce(
-                    func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
 
         else:
 
             def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore
-                return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
+                return self.reduce(func, dim, axis, **kwargs)
 
         return wrapped_func
 
@@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
 
             def wrapped_func(self, dim=None, skipna=None, **kwargs):
                 return self.reduce(
-                    func,
-                    dim,
-                    skipna=skipna,
-                    numeric_only=numeric_only,
-                    allow_lazy=True,
-                    **kwargs,
+                    func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
                 )
 
         else:
 
             def wrapped_func(self, dim=None, **kwargs):  # type: ignore
-                return self.reduce(
-                    func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
 
         return wrapped_func
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index fe8abdc4b95..15a7209ab24 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4031,7 +4031,7 @@ def reduce(
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
-        allow_lazy: bool = False,
+        allow_lazy: bool = None,
         **kwargs: Any,
     ) -> "Dataset":
         """Reduce this dataset by applying `func` along some dimension(s).
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 8ae65d9b9df..c73ee3cf7c5 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -585,9 +585,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
             return self._obj
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
-        return self.reduce(
-            op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
-        )
+        return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)
 
     def first(self, skipna=None, keep_attrs=None):
         """Return the first element of each group along the group dimension
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index f842a4a9428..cf97c997017 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1,5 +1,6 @@
 import functools
 import itertools
+import warnings
 from collections import defaultdict
 from datetime import timedelta
 from distutils.version import LooseVersion
@@ -1427,7 +1428,7 @@ def reduce(
         axis=None,
         keep_attrs=None,
         keepdims=False,
-        allow_lazy=False,
+        allow_lazy=None,
         **kwargs,
     ):
         """Reduce this array by applying `func` along some dimension(s).
@@ -1468,7 +1469,17 @@ def reduce(
 
         if dim is not None:
             axis = self.get_axis_num(dim)
+
+        if allow_lazy is not None:
+            warnings.warn(
+                "allow_lazy is deprecated and will be removed in version 0.16.0. It is now True by default.",
+                DeprecationWarning,
+            )
+        else:
+            allow_lazy = True
+
         input_data = self.data if allow_lazy else self.values
+
         if axis is not None:
             data = func(input_data, axis=axis, **kwargs)
         else:
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 43b788153bc..4c1f317342f 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -12,6 +12,7 @@
 import xarray as xr
 import xarray.ufuncs as xu
 from xarray import DataArray, Dataset, Variable
+from xarray.core import duck_array_ops
 from xarray.testing import assert_chunks_equal
 from xarray.tests import mock
 
@@ -217,6 +218,8 @@ def test_reduce(self):
         self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
         with raises_regex(NotImplementedError, "dask"):
             v.median()
+        with raise_if_dask_computes():
+            v.reduce(duck_array_ops.mean)
 
     def test_missing_values(self):
         values = np.array([0, 1, np.nan, 3])
@@ -488,7 +491,17 @@ def test_groupby(self):
         v = self.lazy_array
 
         expected = u.groupby("x").mean(...)
-        actual = v.groupby("x").mean(...)
+        with raise_if_dask_computes():
+            actual = v.groupby("x").mean(...)
+        self.assertLazyAndAllClose(expected, actual)
+
+    def test_rolling(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.rolling(x=2).mean()
+        with raise_if_dask_computes():
+            actual = v.rolling(x=2).mean()
         self.assertLazyAndAllClose(expected, actual)
 
     def test_groupby_first(self):
@@ -500,7 +513,8 @@ def test_groupby_first(self):
         with raises_regex(NotImplementedError, "dask"):
             v.groupby("ab").first()
         expected = u.groupby("ab").first()
-        actual = v.groupby("ab").first(skipna=False)
+        with raise_if_dask_computes():
+            actual = v.groupby("ab").first(skipna=False)
         self.assertLazyAndAllClose(expected, actual)
 
     def test_reindex(self):
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 528027ed149..d394919dbdd 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1477,6 +1477,10 @@ def test_reduce(self):
 
         with raises_regex(ValueError, "cannot supply both"):
             v.mean(dim="x", axis=0)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=True)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=False)
 
     def test_quantile(self):
         v = Variable(["x", "y"], self.d)

From 7241aa12ae168f7af6efcf13f8012158a1331cb3 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Wed, 13 Nov 2019 15:53:34 +0000
Subject: [PATCH 03/24] warn if dim is passed to rolling operations. (#3513)

* warn if dim is passed to rolling operations.

* Update doc/whats-new.rst

Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>

* Update xarray/core/rolling.py

Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
---
 doc/whats-new.rst              | 3 +++
 xarray/core/rolling.py         | 9 +++++++++
 xarray/tests/test_dataarray.py | 6 ++++++
 3 files changed, 18 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 212e465b368..f042f846c39 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -220,6 +220,9 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
   By `Deepak Cherian <https://github.com/dcherian/>`_.
+- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
+  specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
+  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index f4e571a8efe..a1864332f4d 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1,4 +1,5 @@
 import functools
+import warnings
 from typing import Callable
 
 import numpy as np
@@ -351,6 +352,14 @@ def _bottleneck_reduce(self, func, **kwargs):
     def _numpy_or_bottleneck_reduce(
         self, array_agg_func, bottleneck_move_func, **kwargs
     ):
+        if "dim" in kwargs:
+            warnings.warn(
+                f"Reductions will be applied along the rolling dimension '{self.dim}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+            del kwargs["dim"]
+
         if bottleneck_move_func is not None and not isinstance(
             self.obj.data, dask_array_type
         ):
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 42fae2c9dd4..7c6dc1825a1 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4188,6 +4188,9 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods):
     )
     assert_array_equal(actual.values, expected)
 
+    with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+        getattr(rolling_obj, name)(dim="time")
+
     # Test center
     rolling_obj = da.rolling(time=7, center=center)
     actual = getattr(rolling_obj, name)()["time"]
@@ -4203,6 +4206,9 @@ def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window):
     # dask version
     rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center)
     actual = getattr(rolling_obj, name)().load()
+    if name != "count":
+        with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+            getattr(rolling_obj, name)(dim="time")
     # numpy version
     rolling_obj = da_dask.load().rolling(
         time=window, min_periods=min_periods, center=center

From 40588dc38ddc2d573e3dc8c63b2e6533eb978656 Mon Sep 17 00:00:00 2001
From: Akihiro Matsukawa <amatsukawa@users.noreply.github.com>
Date: Wed, 13 Nov 2019 10:55:32 -0500
Subject: [PATCH 04/24] Allow appending datetime & boolean variables to zarr
 stores (#3504)

* Allow appending datetime and boolean data variables to zarr stores.

* Run black and flake8

* Update error message
---
 doc/whats-new.rst            |  2 ++
 xarray/backends/api.py       |  7 +++++--
 xarray/tests/test_dataset.py | 14 ++++++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index f042f846c39..ea3b012cc98 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -94,6 +94,8 @@ Bug fixes
 - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
   :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
+- Allow appending datetime and bool data variables to zarr stores.
+  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index d23594fc675..945b3937c43 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -1234,6 +1234,8 @@ def _validate_datatypes_for_zarr_append(dataset):
     def check_dtype(var):
         if (
             not np.issubdtype(var.dtype, np.number)
+            and not np.issubdtype(var.dtype, np.datetime64)
+            and not np.issubdtype(var.dtype, np.bool)
             and not coding.strings.is_unicode_dtype(var.dtype)
             and not var.dtype == object
         ):
@@ -1241,8 +1243,9 @@ def check_dtype(var):
             raise ValueError(
                 "Invalid dtype for data variable: {} "
                 "dtype must be a subtype of number, "
-                "a fixed sized string, a fixed size "
-                "unicode string or an object".format(var)
+                "datetime, bool, a fixed sized string, "
+                "a fixed size unicode string or an "
+                "object".format(var)
             )
 
     for k in dataset.data_vars.values():
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index d001c43da94..67d3b3198dc 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -90,6 +90,14 @@ def create_append_test_data(seed=None):
     string_var = np.array(["ae", "bc", "df"], dtype=object)
     string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
     unicode_var = ["áó", "áó", "áó"]
+    datetime_var = np.array(
+        ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]"
+    )
+    datetime_var_to_append = np.array(
+        ["2019-01-04", "2019-01-05"], dtype="datetime64[s]"
+    )
+    bool_var = np.array([True, False, True], dtype=np.bool)
+    bool_var_to_append = np.array([False, True], dtype=np.bool)
 
     ds = xr.Dataset(
         data_vars={
@@ -102,6 +110,8 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var, coords=[time1], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(datetime_var, coords=[time1], dims=["time"]),
+            "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]),
         }
     )
 
@@ -118,6 +128,10 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var[:nt2], coords=[time2], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(
+                datetime_var_to_append, coords=[time2], dims=["time"]
+            ),
+            "bool_var": xr.DataArray(bool_var_to_append, coords=[time2], dims=["time"]),
         }
     )
 

From 810345c4564a2bc15bf1b4c7ba4c4840238f1e82 Mon Sep 17 00:00:00 2001
From: Gina <Dr-G@users.noreply.github.com>
Date: Wed, 13 Nov 2019 14:18:14 -0600
Subject: [PATCH 05/24] FUNDING.yml (#3523)

add NumFOCUS github sponsors button (recurring donations only)

This feature launched today at GitHub Universe!

Also add the custom link to point to the donation form for xarray.

cc @shoyer
---
 .github/FUNDING.yml | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000000..30c1e18f33c
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: numfocus
+custom: http://numfocus.org/donate-to-xarray

From eece07932d5498a8abef6a8fbd30d00066931b18 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 13 Nov 2019 18:22:50 -0700
Subject: [PATCH 06/24] Harmonize `FillValue` and `missing_value` during
 encoding and decoding steps (#3502)

* Replace `equivalent()` with `allclose_or_equiv()`

* Ensure _FillValue & missing_value are cast to same dtype as data's

* Use Numpy scalar during type casting

* Update ValueError message

* Formatting only

* Update whats-new.rst
---
 doc/whats-new.rst           |  2 ++
 xarray/coding/variables.py  | 14 ++++++++++----
 xarray/tests/test_coding.py | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ea3b012cc98..f840557ab5d 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -79,6 +79,8 @@ New Features
 
 Bug fixes
 ~~~~~~~~~
+- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
+  By `Anderson Banihirwe <https://github.com/andersy005>`_. 
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 5f9c8932b6b..2b5f87ab0cd 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -8,7 +8,6 @@
 
 from ..core import dtypes, duck_array_ops, indexing
 from ..core.pycompat import dask_array_type
-from ..core.utils import equivalent
 from ..core.variable import Variable
 
 
@@ -152,18 +151,25 @@ def encode(self, variable, name=None):
         fv = encoding.get("_FillValue")
         mv = encoding.get("missing_value")
 
-        if fv is not None and mv is not None and not equivalent(fv, mv):
+        if (
+            fv is not None
+            and mv is not None
+            and not duck_array_ops.allclose_or_equiv(fv, mv)
+        ):
             raise ValueError(
-                "Variable {!r} has multiple fill values {}. "
-                "Cannot encode data. ".format(name, [fv, mv])
+                f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
             )
 
         if fv is not None:
+            # Ensure _FillValue is cast to same dtype as data's
+            encoding["_FillValue"] = data.dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
                 data = duck_array_ops.fillna(data, fill_value)
 
         if mv is not None:
+            # Ensure missing_value is cast to same dtype as data's
+            encoding["missing_value"] = data.dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and fv is None:
                 data = duck_array_ops.fillna(data, fill_value)
diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index 6cd584daa96..3e0474e7b60 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -20,6 +20,23 @@ def test_CFMaskCoder_decode():
     assert_identical(expected, encoded)
 
 
+def test_CFMaskCoder_encode_missing_fill_values_conflict():
+    original = xr.Variable(
+        ("x",),
+        [0.0, -1.0, 1.0],
+        encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)},
+    )
+    coder = variables.CFMaskCoder()
+    encoded = coder.encode(original)
+
+    assert encoded.dtype == encoded.attrs["missing_value"].dtype
+    assert encoded.dtype == encoded.attrs["_FillValue"].dtype
+
+    with pytest.warns(variables.SerializationWarning):
+        roundtripped = coder.decode(coder.encode(original))
+        assert_identical(roundtripped, original)
+
+
 def test_CFMaskCoder_missing_value():
     expected = xr.DataArray(
         np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),

From 4358762d7ccf0d81dfbbc37d9c0665d53fe9c426 Mon Sep 17 00:00:00 2001
From: keewis <keewis@users.noreply.github.com>
Date: Thu, 14 Nov 2019 02:24:07 +0100
Subject: [PATCH 07/24] Tests for module-level functions with units (#3493)

* add tests for replication functions

* add tests for `xarray.dot`

* add tests for apply_ufunc

* explicitly set the test ids to repr

* add tests for align

* cover a bit more of align

* add tests for broadcast

* black changed how tuple unpacking should look like

* correct the xfail message for full_like tests

* add tests for where

* add tests for concat

* add tests for combine_by_coords

* fix a bug in convert_units

* convert the align results to the same units

* rename the combine_by_coords test

* convert the units for expected in combine_by_coords

* add tests for combine_nested

* add tests for merge with datasets

* only use three datasets for merging

* add tests for merge with dataarrays

* update whats-new.rst
---
 doc/whats-new.rst          |   3 +-
 xarray/tests/test_units.py | 871 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 865 insertions(+), 9 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index f840557ab5d..a7687368884 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -118,7 +118,8 @@ Internal Changes
 ~~~~~~~~~~~~~~~~
 
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
-  (:pull:`3238`, :pull:`3447`, :pull:`3508`) by `Justus Magin <https://github.com/keewis>`_.
+  (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
+  by `Justus Magin <https://github.com/keewis>`_.
 
   .. note::
 
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index fd9e9b039ac..509a50d23ff 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -222,7 +222,9 @@ def convert_units(obj, to):
             if name != obj.name
         }
 
-        new_obj = xr.DataArray(name=name, data=data, coords=coords, attrs=obj.attrs)
+        new_obj = xr.DataArray(
+            name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims
+        )
     elif isinstance(obj, unit_registry.Quantity):
         units = to.get(None)
         new_obj = obj.to(units) if units is not None else obj
@@ -307,19 +309,689 @@ def __repr__(self):
 
 
 class function:
-    def __init__(self, name):
-        self.name = name
-        self.func = getattr(np, name)
+    def __init__(self, name_or_function, *args, **kwargs):
+        if callable(name_or_function):
+            self.name = name_or_function.__name__
+            self.func = name_or_function
+        else:
+            self.name = name_or_function
+            self.func = getattr(np, name_or_function)
+            if self.func is None:
+                raise AttributeError(
+                    f"module 'numpy' has no attribute named '{self.name}'"
+                )
+
+        self.args = args
+        self.kwargs = kwargs
 
     def __call__(self, *args, **kwargs):
-        return self.func(*args, **kwargs)
+        all_args = list(self.args) + list(args)
+        all_kwargs = {**self.kwargs, **kwargs}
+
+        return self.func(*all_args, **all_kwargs)
 
     def __repr__(self):
         return f"function_{self.name}"
 
 
+def test_apply_ufunc_dataarray(dtype):
+    func = function(
+        xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
+    )
+
+    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m
+    x = np.arange(20) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x", coords={"x": x})
+
+    expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
+    result = func(data_array)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(
+    reason="pint does not implement `np.result_type` and align strips units"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataarray(fill_value, variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    data_array1 = xr.DataArray(
+        data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y")
+    )
+    data_array2 = xr.DataArray(
+        data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y")
+    )
+
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
+    if error is not None:
+        with pytest.raises(error):
+            func(data_array1, data_array2)
+
+        return
+
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(data_array1)
+    # FIXME: should the expected_b have the same units as data_array1
+    # or data_array2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(data_array1),
+            strip_units(convert_units(data_array2, units)),
+            **stripped_kwargs,
+        )
+    )
+    result_a, result_b = func(data_array1, data_array2)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+@pytest.mark.xfail(
+    reason="pint does not implement `np.result_type` and align strips units"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataset(fill_value, unit, variant, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array1)},
+        coords={"x": x, "x_a": ("x", x_a1), "y": y1},
+    )
+    ds2 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array2)},
+        coords={"x": x, "x_a": ("x", x_a2), "y": y2},
+    )
+
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
+    if error is not None:
+        with pytest.raises(error):
+            func(ds1, ds2)
+
+        return
+
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(ds1)
+    # FIXME: should the expected_b have the same units as ds1 or ds2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs
+        )
+    )
+    result_a, result_b = func(ds1, ds2)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+def test_broadcast_dataarray(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
+
+    a = xr.DataArray(data=array1, dims="x")
+    b = xr.DataArray(data=array2, dims="y")
+
+    expected_a, expected_b = tuple(
+        attach_units(elem, extract_units(a))
+        for elem in xr.broadcast(strip_units(a), strip_units(b))
+    )
+    result_a, result_b = xr.broadcast(a, b)
+
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+def test_broadcast_dataset(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)})
+
+    (expected,) = tuple(
+        attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds))
+    )
+    (result,) = xr.broadcast(ds)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`combine_by_coords` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_by_coords(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    other_array1 = np.ones_like(array1) * data_unit
+    other_array2 = np.ones_like(array2) * data_unit
+    other_x = np.arange(1, 4) * 10 * dim_unit
+    other_y = np.arange(2, 4) * dim_unit
+    other_z = np.arange(3, 6) * coord_unit
+
+    ds = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    other = xr.Dataset(
+        data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)},
+        coords={"x": other_x, "y": other_y, "z": ("x", other_z)},
+    )
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.combine_by_coords([ds, other])
+
+        return
+
+    units = extract_units(ds)
+    expected = attach_units(
+        xr.combine_by_coords(
+            [strip_units(ds), strip_units(convert_units(other, units))]
+        ),
+        units,
+    )
+    result = xr.combine_by_coords([ds, other])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_nested(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+    ds4 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(6, 9) * dim_unit,
+            "y": np.arange(6, 8) * dim_unit,
+            "z": ("x", np.arange(6, 9) * coord_unit),
+        },
+    )
+
+    func = function(xr.combine_nested, concat_dim=["x", "y"])
+    if error is not None:
+        with pytest.raises(error):
+            func([[ds1, ds2], [ds3, ds4]])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func(
+            [
+                [strip_units(ds1), convert_and_strip(ds2)],
+                [convert_and_strip(ds3), convert_and_strip(ds4)],
+            ]
+        ),
+        units,
+    )
+    result = func([[ds1, ds2], [ds3, ds4]])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x")
+    arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x")
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([arr1, arr2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1)
+    )
+    result = xr.concat([arr1, arr2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1})
+    ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2})
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([ds1, ds2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1)
+    )
+    result = xr.concat([ds1, ds2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit
+    array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit
+    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    y = np.arange(3) * original_unit
+    z = np.arange(4) * original_unit
+    u = np.linspace(10, 20, 2) * original_unit
+    v = np.linspace(10, 20, 3) * original_unit
+    w = np.linspace(10, 20, 4) * original_unit
+
+    arr1 = xr.DataArray(
+        name="a",
+        data=array1,
+        coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)},
+        dims=("x", "y"),
+    )
+    arr2 = xr.DataArray(
+        name="b",
+        data=array2,
+        coords={
+            "x": np.arange(2, 4) * dim_unit,
+            "z": z,
+            "u": ("x", np.linspace(20, 30, 2) * coord_unit),
+            "w": ("z", w),
+        },
+        dims=("x", "z"),
+    )
+    arr3 = xr.DataArray(
+        name="c",
+        data=array3,
+        coords={
+            "y": np.arange(3, 6) * dim_unit,
+            "z": np.arange(4, 8) * dim_unit,
+            "v": ("y", np.linspace(10, 20, 3) * coord_unit),
+            "w": ("z", np.linspace(10, 20, 4) * coord_unit),
+        },
+        dims=("y", "z"),
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([arr1, arr2, arr3])
+
+        return
+
+    units = {name: original_unit for name in list("abcuvwxyz")}
+    convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
+    expected = attach_units(
+        func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]),
+        units,
+    )
+    result = func([arr1, arr2, arr3])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(11, 14) * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([ds1, ds2, ds3])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units
+    )
+    result = func([ds1, ds2, ds3])
+
+    assert_equal_with_units(expected, result)
+
+
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
-def test_replication(func, dtype):
+def test_replication_dataarray(func, dtype):
     array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
     data_array = xr.DataArray(data=array, dims="x")
 
@@ -330,8 +1002,33 @@ def test_replication(func, dtype):
     assert_equal_with_units(expected, result)
 
 
+@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
+def test_replication_dataset(func, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    numpy_func = getattr(np, func.__name__)
+    expected = ds.copy(
+        data={name: numpy_func(array.data) for name, array in ds.data_vars.items()}
+    )
+    result = func(ds)
+
+    assert_equal_with_units(expected, result)
+
+
 @pytest.mark.xfail(
-    reason="np.full_like on Variable strips the unit and pint does not allow mixed args"
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
 )
 @pytest.mark.parametrize(
     "unit,error",
@@ -344,8 +1041,9 @@ def test_replication(func, dtype):
         pytest.param(unit_registry.ms, None, id="compatible_unit"),
         pytest.param(unit_registry.s, None, id="identical_unit"),
     ),
+    ids=repr,
 )
-def test_replication_full_like(unit, error, dtype):
+def test_replication_full_like_dataarray(unit, error, dtype):
     array = np.linspace(0, 5, 10) * unit_registry.s
     data_array = xr.DataArray(data=array, dims="x")
 
@@ -360,6 +1058,163 @@ def test_replication_full_like(unit, error, dtype):
         assert_equal_with_units(expected, result)
 
 
+@pytest.mark.xfail(
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.ms, None, id="compatible_unit"),
+        pytest.param(unit_registry.s, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+def test_replication_full_like_dataset(unit, error, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    fill_value = -1 * unit
+    if error is not None:
+        with pytest.raises(error):
+            xr.full_like(ds, fill_value=fill_value)
+
+        return
+
+    expected = ds.copy(
+        data={
+            name: np.full_like(array, fill_value=fill_value)
+            for name, array in ds.data_vars.items()
+        }
+    )
+    result = xr.full_like(ds, fill_value=fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataarray(fill_value, unit, error, dtype):
+    array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+
+    x = xr.DataArray(data=array, dims="x")
+    cond = x < 5 * unit_registry.m
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, x, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x)
+    )
+    result = xr.where(cond, x, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataset(fill_value, unit, error, dtype):
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m
+    x = np.arange(10) * unit_registry.s
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
+    cond = ds.x < 5 * unit_registry.s
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, ds, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds)
+    )
+    result = xr.where(cond, ds, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="pint does not implement `np.einsum`")
+def test_dot_dataarray(dtype):
+    array1 = (
+        np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+        * unit_registry.m
+        / unit_registry.s
+    )
+    array2 = (
+        np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s
+    )
+
+    arr1 = xr.DataArray(data=array1, dims=("x", "y"))
+    arr2 = xr.DataArray(data=array2, dims=("y", "z"))
+
+    expected = array1.dot(array2)
+    result = xr.dot(arr1, arr2)
+
+    assert_equal_with_units(expected, result)
+
+
 class TestDataArray:
     @pytest.mark.filterwarnings("error:::pint[.*]")
     @pytest.mark.parametrize(

From 8b240376fd91352a80b068af606850e8d57d1090 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Wed, 13 Nov 2019 22:56:59 -0500
Subject: [PATCH 08/24] add Variable._replace (#3528)

* add Variable._replace

* assertions

* whatsew

* whatsnew
---
 doc/whats-new.rst             |  3 +++
 xarray/core/variable.py       | 19 +++++++++++++++++--
 xarray/tests/test_variable.py |  9 +++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index a7687368884..b8fb1f8f58e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -138,6 +138,9 @@ Internal Changes
 - Enable type checking on default sentinel values (:pull:`3472`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
+- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+
 .. _whats-new.0.14.0:
 
 v0.14.0 (14 Oct 2019)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index cf97c997017..e630dc4b457 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1,3 +1,4 @@
+import copy
 import functools
 import itertools
 import warnings
@@ -24,10 +25,11 @@
 from .pycompat import dask_array_type, integer_types
 from .utils import (
     OrderedSet,
+    _default,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
-    infix_dims,
     ensure_us_time_resolution,
+    infix_dims,
 )
 
 try:
@@ -887,7 +889,20 @@ def copy(self, deep=True, data=None):
         # note:
         # dims is already an immutable tuple
         # attributes and encoding will be copied when the new Array is created
-        return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)
+        return self._replace(data=data)
+
+    def _replace(
+        self, dims=_default, data=_default, attrs=_default, encoding=_default
+    ) -> "Variable":
+        if dims is _default:
+            dims = copy.copy(self._dims)
+        if data is _default:
+            data = copy.copy(self.data)
+        if attrs is _default:
+            attrs = copy.copy(self._attrs)
+        if encoding is _default:
+            encoding = copy.copy(self._encoding)
+        return type(self)(dims, data, attrs, encoding, fastpath=True)
 
     def __copy__(self):
         return self.copy(deep=False)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index d394919dbdd..d92a68729b5 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -542,6 +542,15 @@ def test_copy_index_with_data_errors(self):
         with raises_regex(ValueError, "must match shape of object"):
             orig.copy(data=new_data)
 
+    def test_replace(self):
+        var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
+        result = var._replace()
+        assert_identical(result, var)
+
+        new_data = np.arange(4).reshape(2, 2)
+        result = var._replace(data=new_data)
+        assert_array_equal(result.data, new_data)
+
     def test_real_and_imag(self):
         v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"})
         expected_re = self.cls("x", np.arange(3), {"foo": "bar"})

From c0ef2f616e87e9f924425bcd373ac265f14203cb Mon Sep 17 00:00:00 2001
From: Keisuke Fujii <fUjiisoup@gmail.com>
Date: Thu, 14 Nov 2019 20:56:17 +0900
Subject: [PATCH 09/24] Fix set_index when an existing dimension becomes a
 level (#3520)

* Added a test

* Fix set_index

* lint

* black / mypy

* Use _replace method

* whats new
---
 doc/whats-new.rst              |  2 ++
 xarray/core/dataarray.py       | 10 +++++-----
 xarray/core/dataset.py         | 12 ++++++++++--
 xarray/tests/test_dataarray.py | 10 ++++++++++
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index b8fb1f8f58e..abd94779435 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -79,6 +79,8 @@ New Features
 
 Bug fixes
 ~~~~~~~~~
+- Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
   By `Anderson Banihirwe <https://github.com/andersy005>`_. 
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index a192fe08cee..55e73478260 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -48,7 +48,7 @@
     assert_coordinate_consistent,
     remap_label_indexers,
 )
-from .dataset import Dataset, merge_indexes, split_indexes
+from .dataset import Dataset, split_indexes
 from .formatting import format_item
 from .indexes import Indexes, default_indexes
 from .merge import PANDAS_TYPES
@@ -1601,10 +1601,10 @@ def set_index(
         --------
         DataArray.reset_index
         """
-        _check_inplace(inplace)
-        indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index")
-        coords, _ = merge_indexes(indexes, self._coords, set(), append=append)
-        return self._replace(coords=coords)
+        ds = self._to_temp_dataset().set_index(
+            indexes, append=append, inplace=inplace, **indexes_kwargs
+        )
+        return self._from_temp_dataset(ds)
 
     def reset_index(
         self,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 15a7209ab24..de713b830f2 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -204,6 +204,7 @@ def merge_indexes(
     """
     vars_to_replace: Dict[Hashable, Variable] = {}
     vars_to_remove: List[Hashable] = []
+    dims_to_replace: Dict[Hashable, Hashable] = {}
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
@@ -244,7 +245,7 @@ def merge_indexes(
         if not len(names) and len(var_names) == 1:
             idx = pd.Index(variables[var_names[0]].values)
 
-        else:
+        else:  # MultiIndex
             for n in var_names:
                 try:
                     var = variables[n]
@@ -256,15 +257,22 @@ def merge_indexes(
                 levels.append(cat.categories)
 
             idx = pd.MultiIndex(levels, codes, names=names)
+            for n in names:
+                dims_to_replace[n] = dim
 
         vars_to_replace[dim] = IndexVariable(dim, idx)
         vars_to_remove.extend(var_names)
 
     new_variables = {k: v for k, v in variables.items() if k not in vars_to_remove}
     new_variables.update(vars_to_replace)
+
+    # update dimensions if necessary  GH: 3512
+    for k, v in new_variables.items():
+        if any(d in dims_to_replace for d in v.dims):
+            new_dims = [dims_to_replace.get(d, d) for d in v.dims]
+            new_variables[k] = v._replace(dims=new_dims)
     new_coord_names = coord_names | set(vars_to_replace)
     new_coord_names -= set(vars_to_remove)
-
     return new_variables, new_coord_names
 
 
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 7c6dc1825a1..4c3553c867e 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -1182,6 +1182,16 @@ def test_selection_multiindex_remove_unused(self):
         expected = expected.set_index(xy=["x", "y"]).unstack()
         assert_identical(expected, actual)
 
+    def test_selection_multiindex_from_level(self):
+        # GH: 3512
+        da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"})
+        db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"})
+        data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"])
+        assert data.dims == ("xy",)
+        actual = data.sel(y="a")
+        expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop("y")
+        assert_equal(actual, expected)
+
     def test_virtual_default_coords(self):
         array = DataArray(np.zeros((5,)), dims="x")
         expected = DataArray(range(5), dims="x", name="x")

From 7b4a286f59bc7d60d4e4d03be65562ff63f9b111 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Thu, 14 Nov 2019 11:56:49 -0500
Subject: [PATCH 10/24] units & deprecation merge (#3530)

---
 xarray/tests/test_units.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 509a50d23ff..0be6f8af464 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -1969,7 +1969,7 @@ def test_broadcast_equals(self, unit, dtype):
                 dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
                 axis=1,
             ),
-            method("drop", labels="x"),
+            method("drop_sel", labels="x"),
             method("reset_coords", names="x2"),
             method("copy"),
             pytest.param(
@@ -4045,7 +4045,7 @@ def test_reindex_like(self, unit, error, dtype):
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
             pytest.param(
-                method("apply", np.fabs),
+                method("map", np.fabs),
                 marks=pytest.mark.xfail(reason="fabs strips units"),
             ),
         ),
@@ -4220,7 +4220,7 @@ def test_grouped_operations(self, func, dtype):
             method("rename_dims", x="offset_x"),
             method("swap_dims", {"x": "x2"}),
             method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
-            method("drop", labels="x"),
+            method("drop_sel", labels="x"),
             method("drop_dims", "z"),
             method("set_coords", names="c"),
             method("reset_coords", names="x2"),

From ee9da17ef04035cf318b6f1a4bb413f3d10ae614 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Fri, 15 Nov 2019 14:53:16 +0000
Subject: [PATCH 11/24] interpolate_na: Add max_gap support. (#3302)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* interpolate_na: Add maxgap support.

* Add docs.

* Add requires_bottleneck to test.

* Review comments.

* Update xarray/core/dataarray.py

Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>

* Update xarray/core/dataset.py

Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>

* maxgap → max_gap

* update whats-new

* update computation.rst

* Better support uniformly spaced coordinates. Split legnths, interp test

* Raise error for max_gap and irregularly spaced coordinates + test

* rework.

* Use pandas checks for index duplication and monotonicity.

* Progress + add datetime.

* nicer error message

* A few fstrings.

* finish up timedelta max_gap.

* fix whats-new

* small fixes.

* fix dan's test.

* remove redundant test.

* nicer error message.

* Add xfailed cftime tests

* better error checking and tests.

* typing.

* update docstrings

* scipy intersphinx

* fix tests

* add bottleneck testing decorator.
---
 doc/computation.rst          |   3 +
 doc/conf.py                  |  11 +--
 doc/whats-new.rst            |   4 ++
 xarray/core/dataarray.py     |  58 +++++++++++-----
 xarray/core/dataset.py       |  60 +++++++++++-----
 xarray/core/missing.py       | 110 +++++++++++++++++++++++++----
 xarray/tests/test_missing.py | 130 ++++++++++++++++++++++++++++++++++-
 7 files changed, 322 insertions(+), 54 deletions(-)

diff --git a/doc/computation.rst b/doc/computation.rst
index 663c546be20..240a1e5704b 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -95,6 +95,9 @@ for filling missing values via 1D interpolation.
 Note that xarray slightly diverges from the pandas ``interpolate`` syntax by
 providing the ``use_coordinate`` keyword which facilitates a clear specification
 of which values to use as the index in the interpolation.
+xarray also provides the ``max_gap`` keyword argument to limit the interpolation to
+data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na`
+for more.
 
 Aggregation
 ===========
diff --git a/doc/conf.py b/doc/conf.py
index 7c1557a1e66..0e04f8ccde8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -340,9 +340,10 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
-    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
-    "iris": ("http://scitools.org.uk/iris/docs/latest/", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "numba": ("https://numba.pydata.org/numba-doc/latest/", None),
-    "matplotlib": ("https://matplotlib.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
+    "iris": ("https://scitools.org.uk/iris/docs/latest", None),
+    "numpy": ("https://docs.scipy.org/doc/numpy", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
+    "numba": ("https://numba.pydata.org/numba-doc/latest", None),
+    "matplotlib": ("https://matplotlib.org", None),
 }
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index abd94779435..053f785bc05 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,6 +38,10 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+
+- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
+  :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
+  gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels.
   :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
   dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 55e73478260..7ce775b49cd 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2018,44 +2018,69 @@ def fillna(self, value: Any) -> "DataArray":
 
     def interpolate_na(
         self,
-        dim=None,
+        dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, str] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "DataArray":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
         dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline', 'akima'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their
-              respective``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        DataArray
+        interpolated: DataArray
+            Filled in DataArray.
 
         See also
         --------
@@ -2070,6 +2095,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index de713b830f2..913842c4eba 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -3908,42 +3908,65 @@ def interpolate_na(
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, Hashable] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "Dataset":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
-        dim : Hashable
+        dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline': use their respective
-              ``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
-        kwargs : any
-            parameters passed verbatim to the underlying interplation function
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        Dataset
+        interpolated: Dataset
+            Filled in Dataset.
 
         See also
         --------
@@ -3959,6 +3982,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
         return new
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 77dde66484e..117fcaf8f81 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -1,18 +1,46 @@
 import warnings
 from functools import partial
-from typing import Any, Callable, Dict, Sequence
+from numbers import Number
+from typing import Any, Callable, Dict, Hashable, Sequence, Union
 
 import numpy as np
 import pandas as pd
 
 from . import utils
-from .common import _contains_datetime_like_objects
+from .common import _contains_datetime_like_objects, ones_like
 from .computation import apply_ufunc
 from .duck_array_ops import dask_array_type
 from .utils import OrderedSet, is_scalar
 from .variable import Variable, broadcast_variables
 
 
+def _get_nan_block_lengths(obj, dim: Hashable, index: Variable):
+    """
+    Return an object where each NaN element in 'obj' is replaced by the
+    length of the gap the element is in.
+    """
+
+    # make variable so that we get broadcasting for free
+    index = Variable([dim], index)
+
+    # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072
+    arange = ones_like(obj) * index
+    valid = obj.notnull()
+    valid_arange = arange.where(valid)
+    cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0])
+
+    nan_block_lengths = (
+        cumulative_nans.diff(dim=dim, label="upper")
+        .reindex({dim: obj[dim]})
+        .where(valid)
+        .bfill(dim=dim)
+        .where(~valid, 0)
+        .fillna(index[-1] - valid_arange.max())
+    )
+
+    return nan_block_lengths
+
+
 class BaseInterpolator:
     """Generic interpolator class for normalizing interpolation methods
     """
@@ -178,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
     return ds
 
 
-def get_clean_interp_index(arr, dim, use_coordinate=True):
+def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
     """get index to use for x values in interpolation.
 
     If use_coordinate is True, the coordinate that shares the name of the
@@ -195,23 +223,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
             index = arr.coords[use_coordinate]
             if index.ndim != 1:
                 raise ValueError(
-                    "Coordinates used for interpolation must be 1D, "
-                    "%s is %dD." % (use_coordinate, index.ndim)
+                    f"Coordinates used for interpolation must be 1D, "
+                    f"{use_coordinate} is {index.ndim}D."
                 )
+            index = index.to_index()
+
+        # TODO: index.name is None for multiindexes
+        # set name for nice error messages below
+        if isinstance(index, pd.MultiIndex):
+            index.name = dim
+
+        if not index.is_monotonic:
+            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
+
+        if not index.is_unique:
+            raise ValueError(f"Index {index.name!r} has duplicate values")
 
         # raise if index cannot be cast to a float (e.g. MultiIndex)
         try:
             index = index.values.astype(np.float64)
         except (TypeError, ValueError):
             # pandas raises a TypeError
-            # xarray/nuppy raise a ValueError
+            # xarray/numpy raise a ValueError
             raise TypeError(
-                "Index must be castable to float64 to support"
-                "interpolation, got: %s" % type(index)
+                f"Index {index.name!r} must be castable to float64 to support "
+                f"interpolation, got {type(index).__name__}."
             )
-        # check index sorting now so we can skip it later
-        if not (np.diff(index) > 0).all():
-            raise ValueError("Index must be monotonicly increasing")
+
     else:
         axis = arr.get_axis_num(dim)
         index = np.arange(arr.shape[axis], dtype=np.float64)
@@ -220,7 +258,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
 
 
 def interp_na(
-    self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs
+    self,
+    dim: Hashable = None,
+    use_coordinate: Union[bool, str] = True,
+    method: str = "linear",
+    limit: int = None,
+    max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
+    **kwargs,
 ):
     """Interpolate values according to different methods.
     """
@@ -230,6 +274,40 @@ def interp_na(
     if limit is not None:
         valids = _get_valid_fill_mask(self, dim, limit)
 
+    if max_gap is not None:
+        max_type = type(max_gap).__name__
+        if not is_scalar(max_gap):
+            raise ValueError("max_gap must be a scalar.")
+
+        if (
+            dim in self.indexes
+            and isinstance(self.indexes[dim], pd.DatetimeIndex)
+            and use_coordinate
+        ):
+            if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)):
+                raise TypeError(
+                    f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}"
+                )
+
+            if isinstance(max_gap, str):
+                try:
+                    max_gap = pd.to_timedelta(max_gap)
+                except ValueError:
+                    raise ValueError(
+                        f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta"
+                    )
+
+            if isinstance(max_gap, pd.Timedelta):
+                max_gap = np.timedelta64(max_gap.value, "ns")
+
+            max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)
+
+        if not use_coordinate:
+            if not isinstance(max_gap, (Number, np.number)):
+                raise TypeError(
+                    f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}."
+                )
+
     # method
     index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate)
     interp_class, kwargs = _get_interpolator(method, **kwargs)
@@ -253,6 +331,14 @@ def interp_na(
     if limit is not None:
         arr = arr.where(valids)
 
+    if max_gap is not None:
+        if dim not in self.coords:
+            raise NotImplementedError(
+                "max_gap not implemented for unlabeled coordinates yet."
+            )
+        nan_block_lengths = _get_nan_block_lengths(self, dim, index)
+        arr = arr.where(nan_block_lengths <= max_gap)
+
     return arr
 
 
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index cfce5d6f645..0b410383a34 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -5,7 +5,13 @@
 import pytest
 
 import xarray as xr
-from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator
+from xarray.core.missing import (
+    NumpyInterpolator,
+    ScipyInterpolator,
+    SplineInterpolator,
+    get_clean_interp_index,
+    _get_nan_block_lengths,
+)
 from xarray.core.pycompat import dask_array_type
 from xarray.tests import (
     assert_array_equal,
@@ -153,7 +159,7 @@ def test_interpolate_pd_compat_polynomial():
 def test_interpolate_unsorted_index_raises():
     vals = np.array([1, 2, 3], dtype=np.float64)
     expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]})
-    with raises_regex(ValueError, "Index must be monotonicly increasing"):
+    with raises_regex(ValueError, "Index 'x' must be monotonically increasing"):
         expected.interpolate_na(dim="x", method="index")
 
 
@@ -169,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises():
         da.interpolate_na(dim="x", method="foo")
 
 
+def test_interpolate_duplicate_values_raises():
+    data = np.random.randn(2, 3)
+    da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])])
+    with raises_regex(ValueError, "Index 'x' has duplicate values"):
+        da.interpolate_na(dim="x", method="foo")
+
+
 def test_interpolate_multiindex_raises():
     data = np.random.randn(2, 3)
     data[1, 1] = np.nan
     da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])])
     das = da.stack(z=("x", "y"))
-    with raises_regex(TypeError, "Index must be castable to float64"):
+    with raises_regex(TypeError, "Index 'z' must be castable to float64"):
         das.interpolate_na(dim="z")
 
 
@@ -439,3 +452,114 @@ def test_ffill_dataset(ds):
 @requires_bottleneck
 def test_bfill_dataset(ds):
     ds.ffill(dim="time")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "y, lengths",
+    [
+        [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]],
+        [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]],
+        [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]],
+    ],
+)
+def test_interpolate_na_nan_block_lengths(y, lengths):
+    arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]]
+    da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y})
+    index = get_clean_interp_index(da, dim="y", use_coordinate=True)
+    actual = _get_nan_block_lengths(da, dim="y", index=index)
+    expected = da.copy(data=lengths * 2)
+    assert_equal(actual, expected)
+
+
+@pytest.fixture
+def da_time():
+    return xr.DataArray(
+        [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10],
+        dims=["t"],
+    )
+
+
+def test_interpolate_na_max_gap_errors(da_time):
+    with raises_regex(
+        NotImplementedError, "max_gap not implemented for unlabeled coordinates"
+    ):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(ValueError, "max_gap must be a scalar."):
+        da_time.interpolate_na("t", max_gap=(1,))
+
+    da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11)
+    with raises_regex(TypeError, "Underlying index is"):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(TypeError, "Expected integer or floating point"):
+        da_time.interpolate_na("t", max_gap="1H", use_coordinate=False)
+
+    with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"):
+        da_time.interpolate_na("t", max_gap="huh")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "time_range_func",
+    [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)],
+)
+@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")])
+@pytest.mark.parametrize(
+    "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")]
+)
+def test_interpolate_na_max_gap_time_specifier(
+    da_time, max_gap, transform, time_range_func
+):
+    da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11)
+    expected = transform(
+        da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10])
+    )
+    actual = transform(da_time).interpolate_na("t", max_gap=max_gap)
+    assert_equal(actual, expected)
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "coords",
+    [
+        pytest.param(None, marks=pytest.mark.xfail()),
+        {"x": np.arange(4), "y": np.arange(11)},
+    ],
+)
+def test_interpolate_na_2d(coords):
+    da = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+
+    actual = da.interpolate_na("y", max_gap=2)
+    expected_y = da.copy(
+        data=[
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+        ]
+    )
+    assert_equal(actual, expected_y)
+
+    actual = da.interpolate_na("x", max_gap=3)
+    expected_x = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+    assert_equal(actual, expected_x)

From aa876cfd6b3b97ee5028d089ec741d057e3ed688 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Fri, 15 Nov 2019 17:43:53 +0000
Subject: [PATCH 12/24] Leave empty slot when not using accessors

---
 xarray/core/dataarray.py  |  5 ++---
 xarray/core/dataset.py    |  6 ++----
 xarray/core/extensions.py | 13 +++++++++----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 7ce775b49cd..b27a61d530b 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -249,14 +249,14 @@ class DataArray(AbstractArray, DataWithCoords):
         Dictionary for holding arbitrary metadata.
     """
 
-    _accessors: Optional[Dict[str, Any]]  # noqa
+    _cache: Dict[str, Any]
     _coords: Dict[Any, Variable]
     _indexes: Optional[Dict[Hashable, pd.Index]]
     _name: Optional[Hashable]
     _variable: Variable
 
     __slots__ = (
-        "_accessors",
+        "_cache",
         "_coords",
         "_file_obj",
         "_indexes",
@@ -373,7 +373,6 @@ def __init__(
         assert isinstance(coords, dict)
         self._coords = coords
         self._name = name
-        self._accessors = None
 
         # TODO(shoyer): document this argument, once it becomes part of the
         # public interface.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 913842c4eba..ea310dd164b 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -419,8 +419,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     coordinates used for label based indexing.
     """
 
-    _accessors: Optional[Dict[str, Any]]
     _attrs: Optional[Dict[Hashable, Any]]
+    _cache: Dict[str, Any]
     _coord_names: Set[Hashable]
     _dims: Dict[Hashable, int]
     _encoding: Optional[Dict[Hashable, Any]]
@@ -428,8 +428,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     _variables: Dict[Hashable, Variable]
 
     __slots__ = (
-        "_accessors",
         "_attrs",
+        "_cache",
         "_coord_names",
         "_dims",
         "_encoding",
@@ -535,7 +535,6 @@ def __init__(
             data_vars, coords, compat=compat
         )
 
-        self._accessors = None
         self._attrs = dict(attrs) if attrs is not None else None
         self._file_obj = None
         self._encoding = None
@@ -870,7 +869,6 @@ def _construct_direct(
         obj._attrs = attrs
         obj._file_obj = file_obj
         obj._encoding = encoding
-        obj._accessors = None
         return obj
 
     @classmethod
diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py
index f473eaa497d..79abbccea39 100644
--- a/xarray/core/extensions.py
+++ b/xarray/core/extensions.py
@@ -20,10 +20,15 @@ def __get__(self, obj, cls):
             # we're accessing the attribute of the class, i.e., Dataset.geo
             return self._accessor
 
+        # Use the same dict as @pandas.util.cache_readonly.
+        # It must be explicitly declared in obj.__slots__.
         try:
-            return obj._accessors[self._name]
-        except TypeError:
-            obj._accessors = {}
+            cache = obj._cache
+        except AttributeError:
+            cache = obj._cache = {}
+
+        try:
+            return cache[self._name]
         except KeyError:
             pass
 
@@ -35,7 +40,7 @@ def __get__(self, obj, cls):
             # something else (GH933):
             raise RuntimeError("error initializing %r accessor." % self._name)
 
-        obj._accessors[self._name] = accessor_obj
+        cache[self._name] = accessor_obj
         return accessor_obj
 
 

From 68b004fe5033f4a991d152190864ee1180845806 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathause@users.noreply.github.com>
Date: Fri, 15 Nov 2019 20:49:29 +0100
Subject: [PATCH 13/24] ensure rename does not change index type (#3532)

* ensure rename does not change index type

* test requires cftime

* test orig.indexes[time].name is conserved

* use index.rename()
---
 doc/whats-new.rst            |  4 +++
 xarray/core/dataset.py       |  2 +-
 xarray/tests/test_dataset.py | 49 ++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 053f785bc05..3c3bf127a3f 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -83,6 +83,10 @@ New Features
 
 Bug fixes
 ~~~~~~~~~
+- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when 
+  calling :py:meth:`Dataset.rename` (also :py:meth:`Dataset.rename_dims`
+  and :py:meth:`xr.Dataset.rename_vars`). By `Mathias Hauser <https://github.com/mathause>`_
+  (:issue:`3522`).
 - Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index ea310dd164b..3a83b477681 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2665,7 +2665,7 @@ def _rename_indexes(self, name_dict, dims_set):
                     verify_integrity=False,
                 )
             else:
-                index = pd.Index(v, name=new_name)
+                index = v.rename(new_name)
             indexes[new_name] = index
         return indexes
 
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 67d3b3198dc..780843f2e61 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.core.indexes.datetimes import DatetimeIndex
 
 import xarray as xr
 from xarray import (
@@ -22,6 +23,7 @@
     open_dataset,
     set_options,
 )
+from xarray.coding.cftimeindex import CFTimeIndex
 from xarray.core import dtypes, indexing, utils
 from xarray.core.common import duck_array_ops, full_like
 from xarray.core.npcompat import IS_NEP18_ACTIVE
@@ -2458,6 +2460,53 @@ def test_rename_vars(self):
         with pytest.raises(ValueError):
             original.rename_vars(names_dict_bad)
 
+    @requires_cftime
+    def test_rename_does_not_change_CFTimeIndex_type(self):
+        # make sure CFTimeIndex is not converted to DatetimeIndex #3522
+
+        time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], CFTimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], CFTimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+    def test_rename_does_not_change_DatetimeIndex_type(self):
+        # make sure DatetimeIndex is conderved on rename
+
+        time = pd.date_range(start="2000", periods=6, freq="2MS")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], DatetimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], DatetimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
     def test_swap_dims(self):
         original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42})
         expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")})

From 52d48450f6291716a90f4f7e93e15847942e0da0 Mon Sep 17 00:00:00 2001
From: keewis <keewis@users.noreply.github.com>
Date: Fri, 15 Nov 2019 20:58:01 +0100
Subject: [PATCH 14/24] Add DatasetGroupBy.quantile (#3527)

* move the implementation of DataArrayGroupBy.quantile to GroupBy

* add tests for DatasetGroupBy

* update whats-new.rst

* move the item in whats-new.rst into New Features

* don't drop scalar quantile coords
---
 doc/whats-new.rst            |   2 +
 xarray/core/groupby.py       | 107 +++++++++++++-------------
 xarray/tests/test_groupby.py | 143 +++++++++++++++++++++++++++++++----
 3 files changed, 184 insertions(+), 68 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 3c3bf127a3f..c835fbeff45 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -80,6 +80,8 @@ New Features
   invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
+- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`.
+  (:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index c73ee3cf7c5..38ecc04534a 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -557,6 +557,59 @@ def fillna(self, value):
         out = ops.fillna(self, value)
         return out
 
+    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
+        """Compute the qth quantile over each array in the groups and
+        concatenate them together into a new array.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : `...`, str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+            Defaults to the grouped dimension.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+
+        Returns
+        -------
+        quantiles : Variable
+            If `q` is a single quantile, then the result is a
+            scalar. If multiple percentiles are given, first axis of
+            the result corresponds to the quantile. In either case a
+            quantile dimension is added to the return array. The other
+            dimensions are the dimensions that remain after the
+            reduction of the array.
+
+        See Also
+        --------
+        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
+        DataArray.quantile
+        """
+        if dim is None:
+            dim = self._group_dim
+
+        out = self.map(
+            self._obj.__class__.quantile,
+            shortcut=False,
+            q=q,
+            dim=dim,
+            interpolation=interpolation,
+            keep_attrs=keep_attrs,
+        )
+
+        return out
+
     def where(self, cond, other=dtypes.NA):
         """Return elements from `self` or `other` depending on `cond`.
 
@@ -737,60 +790,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
         combined = self._maybe_unstack(combined)
         return combined
 
-    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
-        """Compute the qth quantile over each array in the groups and
-        concatenate them together into a new array.
-
-        Parameters
-        ----------
-        q : float in range of [0,1] (or sequence of floats)
-            Quantile to compute, which must be between 0 and 1
-            inclusive.
-        dim : `...`, str or sequence of str, optional
-            Dimension(s) over which to apply quantile.
-            Defaults to the grouped dimension.
-        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
-
-        Returns
-        -------
-        quantiles : Variable
-            If `q` is a single quantile, then the result
-            is a scalar. If multiple percentiles are given, first axis of
-            the result corresponds to the quantile and a quantile dimension
-            is added to the return array. The other dimensions are the
-            dimensions that remain after the reduction of the array.
-
-        See Also
-        --------
-        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
-        DataArray.quantile
-        """
-        if dim is None:
-            dim = self._group_dim
-
-        out = self.map(
-            self._obj.__class__.quantile,
-            shortcut=False,
-            q=q,
-            dim=dim,
-            interpolation=interpolation,
-            keep_attrs=keep_attrs,
-        )
-
-        if np.asarray(q, dtype=np.float64).ndim == 0:
-            out = out.drop_vars("quantile")
-        return out
-
     def reduce(
         self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs
     ):
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 581affa3471..97bd31ae050 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -137,42 +137,58 @@ def test_da_groupby_empty():
 
 def test_da_groupby_quantile():
 
-    array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])])
+    array = xr.DataArray(
+        data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
+    )
 
     # Scalar quantile
-    expected = xr.DataArray([2, 5], [("x", [1, 2])])
+    expected = xr.DataArray(
+        data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x"
+    )
     actual = array.groupby("x").quantile(0.5)
     assert_identical(expected, actual)
 
     # Vector quantile
-    expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])])
+    expected = xr.DataArray(
+        data=[[1, 3], [4, 6]],
+        coords={"x": [1, 2], "quantile": [0, 1]},
+        dims=("x", "quantile"),
+    )
     actual = array.groupby("x").quantile([0, 1])
     assert_identical(expected, actual)
 
     # Multiple dimensions
     array = xr.DataArray(
-        [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])],
+        data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+        dims=("x", "y"),
     )
 
     actual_x = array.groupby("x").quantile(0, dim=...)
-    expected_x = xr.DataArray([1, 4], [("x", [1, 2])])
+    expected_x = xr.DataArray(
+        data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x"
+    )
     assert_identical(expected_x, actual_x)
 
     actual_y = array.groupby("y").quantile(0, dim=...)
-    expected_y = xr.DataArray([1, 22], [("y", [0, 1])])
+    expected_y = xr.DataArray(
+        data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y"
+    )
     assert_identical(expected_y, actual_y)
 
     actual_xx = array.groupby("x").quantile(0)
     expected_xx = xr.DataArray(
-        [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])]
+        data=[[1, 11, 22], [4, 15, 24]],
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_xx, actual_xx)
 
     actual_yy = array.groupby("y").quantile(0)
     expected_yy = xr.DataArray(
-        [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])],
+        data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_yy, actual_yy)
 
@@ -180,14 +196,14 @@ def test_da_groupby_quantile():
     x = [0, 1]
     foo = xr.DataArray(
         np.reshape(np.arange(365 * 2), (365, 2)),
-        coords=dict(time=times, x=x),
+        coords={"time": times, "x": x},
         dims=("time", "x"),
     )
     g = foo.groupby(foo.time.dt.month)
 
     actual = g.quantile(0, dim=...)
     expected = xr.DataArray(
-        [
+        data=[
             0.0,
             62.0,
             120.0,
@@ -201,12 +217,111 @@ def test_da_groupby_quantile():
             610.0,
             670.0,
         ],
-        [("month", np.arange(1, 13))],
+        coords={"month": np.arange(1, 13), "quantile": 0},
+        dims="month",
     )
     assert_identical(expected, actual)
 
     actual = g.quantile(0, dim="time")[:2]
-    expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])])
+    expected = xr.DataArray(
+        data=[[0.0, 1], [62.0, 63]],
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+        dims=("month", "x"),
+    )
+    assert_identical(expected, actual)
+
+
+def test_ds_groupby_quantile():
+    ds = xr.Dataset(
+        data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]}
+    )
+
+    # Scalar quantile
+    expected = xr.Dataset(
+        data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]}
+    )
+    actual = ds.groupby("x").quantile(0.5)
+    assert_identical(expected, actual)
+
+    # Vector quantile
+    expected = xr.Dataset(
+        data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])},
+        coords={"x": [1, 2], "quantile": [0, 1]},
+    )
+    actual = ds.groupby("x").quantile([0, 1])
+    assert_identical(expected, actual)
+
+    # Multiple dimensions
+    ds = xr.Dataset(
+        data_vars={
+            "a": (
+                ("x", "y"),
+                [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+            )
+        },
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+    )
+
+    actual_x = ds.groupby("x").quantile(0, dim=...)
+    expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0})
+    assert_identical(expected_x, actual_x)
+
+    actual_y = ds.groupby("y").quantile(0, dim=...)
+    expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0})
+    assert_identical(expected_y, actual_y)
+
+    actual_xx = ds.groupby("x").quantile(0)
+    expected_xx = xr.Dataset(
+        {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])},
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+    )
+    assert_identical(expected_xx, actual_xx)
+
+    actual_yy = ds.groupby("y").quantile(0)
+    expected_yy = xr.Dataset(
+        {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])},
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+    ).transpose()
+    assert_identical(expected_yy, actual_yy)
+
+    times = pd.date_range("2000-01-01", periods=365)
+    x = [0, 1]
+    foo = xr.Dataset(
+        {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))},
+        coords=dict(time=times, x=x),
+    )
+    g = foo.groupby(foo.time.dt.month)
+
+    actual = g.quantile(0, dim=...)
+    expected = xr.Dataset(
+        {
+            "a": (
+                "month",
+                [
+                    0.0,
+                    62.0,
+                    120.0,
+                    182.0,
+                    242.0,
+                    304.0,
+                    364.0,
+                    426.0,
+                    488.0,
+                    548.0,
+                    610.0,
+                    670.0,
+                ],
+            )
+        },
+        coords={"month": np.arange(1, 13), "quantile": 0},
+    )
+    assert_identical(expected, actual)
+
+    actual = g.quantile(0, dim="time").isel(month=slice(None, 2))
+    expected = xr.Dataset(
+        data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])},
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+    )
     assert_identical(expected, actual)
 
 

From 56c16e4bf45a3771fd9acba76d802c0199c14519 Mon Sep 17 00:00:00 2001
From: Keisuke Fujii <fUjiisoup@gmail.com>
Date: Sat, 16 Nov 2019 23:36:43 +0900
Subject: [PATCH 15/24] Added fill_value for unstack (#3541)

* Added fill_value for unstack

* remove sparse option and fix unintended changes

* a bug fix

* using assert_equal

* assert_equals -> assert_equal
---
 doc/whats-new.rst            |  3 +++
 xarray/core/dataarray.py     |  7 +++++--
 xarray/core/dataset.py       | 13 +++++++++----
 xarray/tests/test_dataset.py | 17 +++++++++++++++++
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index c835fbeff45..6bf495713fe 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -39,6 +39,9 @@ Breaking changes
 New Features
 ~~~~~~~~~~~~
 
+- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and
+  :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
   :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
   gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b27a61d530b..23342fc5e0d 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1726,7 +1726,9 @@ def stack(
         return self._from_temp_dataset(ds)
 
     def unstack(
-        self, dim: Union[Hashable, Sequence[Hashable], None] = None
+        self,
+        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        fill_value: Any = dtypes.NA,
     ) -> "DataArray":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
@@ -1739,6 +1741,7 @@ def unstack(
         dim : hashable or sequence of hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -1770,7 +1773,7 @@ def unstack(
         --------
         DataArray.stack
         """
-        ds = self._to_temp_dataset().unstack(dim)
+        ds = self._to_temp_dataset().unstack(dim, fill_value)
         return self._from_temp_dataset(ds)
 
     def to_unstacked_dataset(self, dim, level=0):
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 3a83b477681..371e0d6bf26 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -3333,7 +3333,7 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(self, dim: Hashable) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset":
         index = self.get_index(dim)
         index = index.remove_unused_levels()
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -3342,7 +3342,7 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
         if index.equals(full_idx):
             obj = self
         else:
-            obj = self.reindex({dim: full_idx}, copy=False)
+            obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value)
 
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
@@ -3368,7 +3368,11 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
             variables, coord_names=coord_names, indexes=indexes
         )
 
-    def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
+    def unstack(
+        self,
+        dim: Union[Hashable, Iterable[Hashable]] = None,
+        fill_value: Any = dtypes.NA,
+    ) -> "Dataset":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -3380,6 +3384,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
         dim : Hashable or iterable of Hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -3417,7 +3422,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
 
         result = self.copy(deep=False)
         for dim in dims:
-            result = result._unstack_once(dim)
+            result = result._unstack_once(dim, fill_value)
         return result
 
     def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset":
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 780843f2e61..be40ce7c6e8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2794,6 +2794,23 @@ def test_unstack_errors(self):
         with raises_regex(ValueError, "do not have a MultiIndex"):
             ds.unstack("x")
 
+    def test_unstack_fill_value(self):
+        ds = xr.Dataset(
+            {"var": (("x",), np.arange(6))},
+            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
+        )
+        # make ds incomplete
+        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
+        # test fill_value
+        actual = ds.unstack("index", fill_value=-1)
+        expected = ds.unstack("index").fillna(-1).astype(np.int)
+        assert actual["var"].dtype == np.int
+        assert_equal(actual, expected)
+
+        actual = ds["var"].unstack("index", fill_value=-1)
+        expected = ds["var"].unstack("index").fillna(-1).astype(np.int)
+        assert actual.equals(expected)
+
     def test_stack_unstack_fast(self):
         ds = Dataset(
             {

From 9755e3f3e986c3ab89797ce86201b64b7f702184 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Sat, 16 Nov 2019 15:36:49 -0500
Subject: [PATCH 16/24] small simplification of rename from #3532 (#3539)

---
 xarray/core/dataset.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 371e0d6bf26..5de254614ff 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2657,13 +2657,7 @@ def _rename_indexes(self, name_dict, dims_set):
                 continue
             if isinstance(v, pd.MultiIndex):
                 new_names = [name_dict.get(k, k) for k in v.names]
-                index = pd.MultiIndex(
-                    v.levels,
-                    v.labels,
-                    v.sortorder,
-                    names=new_names,
-                    verify_integrity=False,
-                )
+                index = v.rename(names=new_names)
             else:
                 index = v.rename(new_name)
             indexes[new_name] = index

From 980a1d26969b603d4be61033791781abd702d02a Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Sat, 16 Nov 2019 13:39:33 -0700
Subject: [PATCH 17/24] tweak whats-new. (#3540)

* tweak whats-new.

* update.
---
 doc/conf.py       |  1 +
 doc/whats-new.rst | 96 ++++++++++++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 0e04f8ccde8..f1199d53fb7 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -346,4 +346,5 @@
     "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
     "numba": ("https://numba.pydata.org/numba-doc/latest", None),
     "matplotlib": ("https://matplotlib.org", None),
+    "dask": ("https://docs.dask.org/en/latest", None),
 }
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 6bf495713fe..cb274bcaee8 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -21,33 +21,35 @@ v0.14.1 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
-- Broken compatibility with cftime < 1.0.3.
-  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Broken compatibility with ``cftime < 1.0.3`` . By `Deepak Cherian <https://github.com/dcherian>`_.
 
-  .. note::
+  .. warning::
 
     cftime version 1.0.4 is broken
     (`cftime/126 <https://github.com/Unidata/cftime/issues/126>`_);
     please use version 1.0.4.2 instead.
 
-- All leftover support for dates from non-standard calendars through netcdftime, the
+- All leftover support for dates from non-standard calendars through ``netcdftime``, the
   module included in versions of netCDF4 prior to 1.4 that eventually became the
-  cftime package, has been removed in favor of relying solely on the standalone
-  cftime package (:pull:`3450`).
+  `cftime <https://github.com/Unidata/cftime/>`_ package, has been removed in favor of relying solely on
+  the standalone ``cftime`` package (:pull:`3450`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
 
 New Features
 ~~~~~~~~~~~~
 
-- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and
-  :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`).
+- Added the ``max_gap`` kwarg to :py:meth:`DataArray.interpolate_na` and
+  :py:meth:`Dataset.interpolate_na`. This controls the maximum size of the data
+- Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and
+  :py:meth:`Dataset.unstack` (:issue:`3518`, :pull:`3541`).
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
   :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
   gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels.
   :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
-  dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible 
+  dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` &
+  :py:meth:`DataArray.drop` methods remain as a backward compatible
   option for dropping either labels or variables, but using the more specific methods is encouraged.
   (:pull:`3475`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
@@ -58,71 +60,71 @@ New Features
   methods is encouraged.
   (:pull:`3459`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`)
+- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (``...``)
   to represent all 'other' dimensions. For example, to move one dimension to the front,
-  use `.transpose('x', ...)`. (:pull:`3421`)
+  use ``.transpose('x', ...)``. (:pull:`3421`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use
-  `...` directly. As before, you can use this to instruct a `groupby` operation
-  to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest
-  using `...`. (:pull:`3418`)
+- Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (``...``), and changed internal usages to use
+  ``...`` directly. As before, you can use this to instruct a ``groupby`` operation
+  to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest
+  using ``...``. (:pull:`3418`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the
-  `dims=...` option to sum over the union of dimensions of all input arrays
+- :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the
+  ``dims=...`` option to sum over the union of dimensions of all input arrays
   (:issue:`3423`) by `Mathias Hauser <https://github.com/mathause>`_.
 - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve
-  representation of objects in jupyter. By default this feature is turned off
-  for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`.
+  representation of objects in Jupyter. By default this feature is turned off
+  for now. Enable it with ``xarray.set_options(display_style="html")``.
   (:pull:`3425`) by `Benoit Bovy <https://github.com/benbovy>`_ and
   `Julia Signell <https://github.com/jsignell>`_.
 - Implement `dask deterministic hashing
   <https://docs.dask.org/en/latest/custom-collections.html#deterministic-hashing>`_
   for xarray objects. Note that xarray objects with a dask.array backend already used
   deterministic hashing in previous releases; this change implements it when whole
-  xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
+  xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map` is
   invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
-- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`.
+- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`.
   (:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
 - Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when 
-  calling :py:meth:`Dataset.rename` (also :py:meth:`Dataset.rename_dims`
-  and :py:meth:`xr.Dataset.rename_vars`). By `Mathias Hauser <https://github.com/mathause>`_
-  (:issue:`3522`).
-- Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`)
-  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
-- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
+  calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`.
+  By `Mathias Hauser <https://github.com/mathause>`_. (:issue:`3522`).
+- Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level
+  variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`)
   By `Anderson Banihirwe <https://github.com/andersy005>`_. 
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- Use dask names to compare dask objects prior to comparing values after computation.
+- Make alignment and concatenation significantly more efficient by using dask names to compare dask
+  objects prior to comparing values after computation. This change makes it more convenient to carry
+  around large non-dimensional coordinate variables backed by dask arrays. Existing workarounds involving
+  ``reset_coords(drop=True)`` should now be unnecessary in most cases.
   (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
-  By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Add support for cftime>=1.0.4. By `Anderson Banihirwe <https://github.com/andersy005>`_.
 - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
   In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
-  :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
+- Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
 - Allow appending datetime and bool data variables to zarr stores.
   (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
-- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html)
-  by `Mickaël Lalande <https://github.com/mickaellalande>`_.
+- Fix leap year condition in `monthly means example <http://xarray.pydata.org/en/stable/examples/monthly-means.html>`_.
+  By `Mickaël Lalande <https://github.com/mickaellalande>`_.
 - Fix the documentation of :py:meth:`DataArray.resample` and
-  :py:meth:`Dataset.resample` and explicitly state that a
+  :py:meth:`Dataset.resample` — explicitly state that a
   datetime-like dimension is required. (:pull:`3400`)
   By `Justus Magin <https://github.com/keewis>`_.
-- Update the terminology page to address multidimensional coordinates. (:pull:`3410`)
+- Update the :ref:`terminology` page to address multidimensional coordinates. (:pull:`3410`)
   By `Jon Thielen <https://github.com/jthielen>`_.
 - Fix the documentation of :py:meth:`Dataset.integrate` and
   :py:meth:`DataArray.integrate` and add an example to
@@ -186,15 +188,15 @@ Breaking changes
   (:issue:`3222`, :issue:`3293`, :issue:`3340`, :issue:`3346`, :issue:`3358`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 
-- Dropped the `drop=False` optional parameter from :meth:`Variable.isel`.
+- Dropped the ``drop=False`` optional parameter from :py:meth:`Variable.isel`.
   It was unused and doesn't make sense for a Variable. (:pull:`3375`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 
-- Remove internal usage of `collections.OrderedDict`. After dropping support for
-  Python <=3.5, most uses of `OrderedDict` in Xarray were no longer necessary. We
-  have removed the internal use of the `OrderedDict` in favor of Python's builtin
-  `dict` object which is now ordered itself. This change will be most obvious when
-  interacting with the `attrs` property on the Dataset and DataArray objects.
+- Remove internal usage of :py:class:`collections.OrderedDict`. After dropping support for
+  Python <=3.5, most uses of ``OrderedDict`` in Xarray were no longer necessary. We
+  have removed the internal use of the ``OrderedDict`` in favor of Python's builtin
+  ``dict`` object which is now ordered itself. This change will be most obvious when
+  interacting with the ``attrs`` property on Dataset and DataArray objects.
   (:issue:`3380`, :pull:`3389`). By `Joe Hamman <https://github.com/jhamman>`_.
 
 New functions/methods
@@ -220,15 +222,15 @@ Enhancements
   - Added a ``GroupBy.dims`` property that mirrors the dimensions
     of each group (:issue:`3344`).
 
-- Speed up :meth:`Dataset.isel` up to 33% and :meth:`DataArray.isel` up to 25% for small
+- Speed up :py:meth:`Dataset.isel` up to 33% and :py:meth:`DataArray.isel` up to 25% for small
   arrays (:issue:`2799`, :pull:`3375`). By
   `Guido Imperiale <https://github.com/crusaderky>`_.
 
 Bug fixes
 ~~~~~~~~~
 - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been
-  reinstated for :class:`DataArray` and :class:`Dataset` objects only. Internal xarray
-  objects remain unaddressable by weakref in order to save memory
+  reinstated for :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects only.
+  Internal xarray objects remain unaddressable by weakref in order to save memory
   (:issue:`3317`). By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord
   now plot the correct data for 2D DataArrays
@@ -238,7 +240,7 @@ Bug fixes
 - The default behaviour of reducing across all dimensions for
   :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed
   as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`).
-  Use `xarray.ALL_DIMS` if you need to replicate previous behaviour.
+  Use ``xarray.ALL_DIMS`` if you need to replicate previous behaviour.
   Also raise nicer error message when no groups are created (:issue:`1764`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
@@ -325,7 +327,7 @@ New functions/methods
 
 - xarray can now wrap around any
   `NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
-  numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in
+  numpy-like library (important: read notes about ``NUMPY_EXPERIMENTAL_ARRAY_FUNCTION`` in
   the above link). Added explicit test coverage for
   `sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`).
   This requires `sparse>=0.8.0`. By `Nezar Abdennur <https://github.com/nvictus>`_

From 45fd0e63f43cf313b022a33aeec7f0f982e1908b Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Tue, 19 Nov 2019 14:06:45 +0000
Subject: [PATCH 18/24] Numpy 1.18 support (#3537)

* Closes #3409

* Unpin versions

* Rewrite unit test for clarity about its real scope

* mean() on dask

* Trivial

* duck_array_ops should never receive xarray.Variable
---
 ci/azure/install.yml                |  2 +-
 ci/requirements/py36.yml            |  2 +-
 ci/requirements/py37.yml            |  2 +-
 doc/whats-new.rst                   |  7 +++-
 xarray/core/dataset.py              |  4 ++-
 xarray/core/duck_array_ops.py       | 28 ++++++++++++++--
 xarray/tests/test_dataset.py        |  4 ++-
 xarray/tests/test_duck_array_ops.py | 50 +++++++++++++++++++----------
 8 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/ci/azure/install.yml b/ci/azure/install.yml
index fee886ba804..baa69bcc8d5 100644
--- a/ci/azure/install.yml
+++ b/ci/azure/install.yml
@@ -16,9 +16,9 @@ steps:
         --pre \
         --upgrade \
         matplotlib \
+        numpy \
         pandas \
         scipy
-        # numpy \  # FIXME https://github.com/pydata/xarray/issues/3409
     pip install \
         --no-deps \
         --upgrade \
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index 10fe69253e8..820160b19cc 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -25,7 +25,7 @@ dependencies:
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy<1.18  # FIXME https://github.com/pydata/xarray/issues/3409
+  - numpy
   - pandas
   - pint
   - pip
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index 827c664a222..4a7aaf7d32b 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -25,7 +25,7 @@ dependencies:
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy<1.18  # FIXME https://github.com/pydata/xarray/issues/3409
+  - numpy
   - pandas
   - pint
   - pip
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index cb274bcaee8..0c929b5b711 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -115,6 +115,12 @@ Bug fixes
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
 - Allow appending datetime and bool data variables to zarr stores.
   (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
+- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
+  (:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
+- Add support for pandas >=0.26 (:issue:`3440`).
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
+  By `Barron Henderson <https://github.com/barronh>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -133,7 +139,6 @@ Documentation
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
-
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
   (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
   by `Justus Magin <https://github.com/keewis>`_.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5de254614ff..c631a4c11ea 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5316,7 +5316,9 @@ def _integrate_one(self, coord, datetime_unit=None):
                 datetime_unit, _ = np.datetime_data(coord_var.dtype)
             elif datetime_unit is None:
                 datetime_unit = "s"  # Default to seconds for cftime objects
-            coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit)
+            coord_var = coord_var._replace(
+                data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit)
+            )
 
         variables = {}
         coord_names = set()
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 71e79335c3d..cf616acb485 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs):
 _mean = _create_nan_agg_method("mean")
 
 
+def _datetime_nanmin(array):
+    """nanmin() function for datetime64.
+
+    Caveats that this function deals with:
+
+    - In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
+    - numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
+    - dask min() does not work on datetime64 (all versions at the moment of writing)
+    """
+    assert array.dtype.kind in "mM"
+    dtype = array.dtype
+    # (NaT).astype(float) does not produce NaN...
+    array = where(pandas_isnull(array), np.nan, array.astype(float))
+    array = min(array, skipna=True)
+    if isinstance(array, float):
+        array = np.array(array)
+    # ...but (NaN).astype("M8") does produce NaT
+    return array.astype(dtype)
+
+
 def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
     """Convert an array containing datetime-like data to an array of floats.
 
@@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
     """
     # TODO: make this function dask-compatible?
     if offset is None:
-        offset = array.min()
+        if array.dtype.kind in "Mm":
+            offset = _datetime_nanmin(array)
+        else:
+            offset = min(array)
     array = array - offset
 
     if not hasattr(array, "dtype"):  # scalar is converted to 0d-array
@@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs):
 
     array = asarray(array)
     if array.dtype.kind in "Mm":
-        offset = min(array)
+        offset = _datetime_nanmin(array)
+
         # xarray always uses np.datetime64[ns] for np.datetime64 data
         dtype = "timedelta64[ns]"
         return (
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index be40ce7c6e8..de074da541f 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5874,7 +5874,9 @@ def test_trapz_datetime(dask, which_datetime):
 
     actual = da.integrate("time", datetime_unit="D")
     expected_data = np.trapz(
-        da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0
+        da.data,
+        duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
+        axis=0,
     )
     expected = xr.DataArray(
         expected_data,
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index f678af2fec5..aee7bbd6b11 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -274,23 +274,39 @@ def assert_dask_array(da, dask):
 
 
 @arm_xfail
-@pytest.mark.parametrize("dask", [False, True])
-def test_datetime_reduce(dask):
-    time = np.array(pd.date_range("15/12/1999", periods=11))
-    time[8:11] = np.nan
-    da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})
-
-    if dask and has_dask:
-        chunks = {"time": 5}
-        da = da.chunk(chunks)
-
-    actual = da["time"].mean()
-    assert not pd.isnull(actual)
-    actual = da["time"].mean(skipna=False)
-    assert pd.isnull(actual)
-
-    # test for a 0d array
-    assert da["time"][0].mean() == da["time"][:1].mean()
+@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
+def test_datetime_mean(dask):
+    # Note: only testing numpy, as dask is broken upstream
+    da = DataArray(
+        np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
+        dims=["time"],
+    )
+    if dask:
+        # Trigger use case where a chunk is full of NaT
+        da = da.chunk({"time": 3})
+
+    expect = DataArray(np.array("2010-01-02", dtype="M8"))
+    expect_nat = DataArray(np.array("NaT", dtype="M8"))
+
+    actual = da.mean()
+    if dask:
+        assert actual.chunks is not None
+    assert_equal(actual, expect)
+
+    actual = da.mean(skipna=False)
+    if dask:
+        assert actual.chunks is not None
+    assert_equal(actual, expect_nat)
+
+    # tests for 1d array full of NaT
+    assert_equal(da[[1]].mean(), expect_nat)
+    assert_equal(da[[1]].mean(skipna=False), expect_nat)
+
+    # tests for a 0d array
+    assert_equal(da[0].mean(), da[0])
+    assert_equal(da[0].mean(skipna=False), da[0])
+    assert_equal(da[1].mean(), expect_nat)
+    assert_equal(da[1].mean(skipna=False), expect_nat)
 
 
 @requires_cftime

From dc559ea4a0b043908b5539641c2d22ab9a051b19 Mon Sep 17 00:00:00 2001
From: keewis <keewis@users.noreply.github.com>
Date: Tue, 19 Nov 2019 16:32:25 +0100
Subject: [PATCH 19/24] Silence sphinx warnings (#3516)

* silence sphinx warnings

* silence more sphinx warnings

* fix some references

* fix the docstrings of Dataset reduce methods

* mark the orphaned files as such

* silence some nit-picky warnings

* convert all references to xray to double backtick quoted text

* silence more warnings in whats-new.rst

* require a whatsnew format of Name <https://github.com/user>

* rename the second cf conventions link

* silence more sphinx warnings

* get interpolate_na docstrings in sync with master

* fix sphinx warnings for interpolate_na docstrings

* update references to old documentation sections

* cut the link to h5netcdf.File

* use the correct reference types for numpy

* update the reference to atop (dask renamed it to blockwise)

* rewrite numpy docstrings

* guard against non-str documentation

* pass name to skip_signature

* remove links to pandas.Panel

* convince sphinx to create pages astype and groupby().quantile

* more warnings
---
 doc/README.rst               |   2 +
 doc/api-hidden.rst           |   5 +
 doc/combining.rst            |   6 +-
 doc/computation.rst          |   6 +-
 doc/dask.rst                 |   2 +-
 doc/data-structures.rst      |   6 +-
 doc/pandas.rst               |   2 +-
 doc/whats-new.rst            | 240 +++++++++++++++++------------------
 xarray/backends/api.py       |   8 +-
 xarray/coding/cftimeindex.py |   2 +-
 xarray/core/alignment.py     |   2 +-
 xarray/core/combine.py       |   2 +
 xarray/core/common.py        |  26 ++--
 xarray/core/computation.py   |   2 +-
 xarray/core/concat.py        |   1 +
 xarray/core/dataarray.py     |  12 +-
 xarray/core/dataset.py       |  15 ++-
 xarray/core/groupby.py       |   7 +-
 xarray/plot/plot.py          |   2 +-
 xarray/ufuncs.py             |  40 ++++++
 20 files changed, 229 insertions(+), 159 deletions(-)

diff --git a/doc/README.rst b/doc/README.rst
index af7bc96092c..0579f85d85f 100644
--- a/doc/README.rst
+++ b/doc/README.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 xarray
 ------
 
diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
index 8f82b30a442..027c732697f 100644
--- a/doc/api-hidden.rst
+++ b/doc/api-hidden.rst
@@ -2,6 +2,8 @@
 .. This extra page is a work around for sphinx not having any support for
 .. hiding an autosummary table.
 
+:orphan:
+
 .. currentmodule:: xarray
 
 .. autosummary::
@@ -30,9 +32,11 @@
    core.groupby.DatasetGroupBy.first
    core.groupby.DatasetGroupBy.last
    core.groupby.DatasetGroupBy.fillna
+   core.groupby.DatasetGroupBy.quantile
    core.groupby.DatasetGroupBy.where
 
    Dataset.argsort
+   Dataset.astype
    Dataset.clip
    Dataset.conj
    Dataset.conjugate
@@ -71,6 +75,7 @@
    core.groupby.DataArrayGroupBy.first
    core.groupby.DataArrayGroupBy.last
    core.groupby.DataArrayGroupBy.fillna
+   core.groupby.DataArrayGroupBy.quantile
    core.groupby.DataArrayGroupBy.where
 
    DataArray.argsort
diff --git a/doc/combining.rst b/doc/combining.rst
index 4593d410d23..05b7f2efc50 100644
--- a/doc/combining.rst
+++ b/doc/combining.rst
@@ -255,11 +255,11 @@ Combining along multiple dimensions
   ``combine_nested``.
 
 For combining many objects along multiple dimensions xarray provides
-:py:func:`~xarray.combine_nested`` and :py:func:`~xarray.combine_by_coords`. These
+:py:func:`~xarray.combine_nested` and :py:func:`~xarray.combine_by_coords`. These
 functions use a combination of ``concat`` and ``merge`` across different
 variables to combine many objects into one.
 
-:py:func:`~xarray.combine_nested`` requires specifying the order in which the
+:py:func:`~xarray.combine_nested` requires specifying the order in which the
 objects should be combined, while :py:func:`~xarray.combine_by_coords` attempts to
 infer this ordering automatically from the coordinates in the data.
 
@@ -310,4 +310,4 @@ These functions can be used by :py:func:`~xarray.open_mfdataset` to open many
 files as one dataset. The particular function used is specified by setting the
 argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for
 situations where your data is split across many files in multiple locations,
-which have some known relationship between one another.
\ No newline at end of file
+which have some known relationship between one another.
diff --git a/doc/computation.rst b/doc/computation.rst
index 240a1e5704b..1ac30f55ee7 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -325,8 +325,8 @@ Broadcasting by dimension name
 ``DataArray`` objects are automatically align themselves ("broadcasting" in
 the numpy parlance) by dimension name instead of axis order. With xarray, you
 do not need to transpose arrays or insert dimensions of length 1 to get array
-operations to work, as commonly done in numpy with :py:func:`np.reshape` or
-:py:const:`np.newaxis`.
+operations to work, as commonly done in numpy with :py:func:`numpy.reshape` or
+:py:data:`numpy.newaxis`.
 
 This is best illustrated by a few examples. Consider two one-dimensional
 arrays with different sizes aligned along different dimensions:
@@ -566,7 +566,7 @@ to set ``axis=-1``. As an example, here is how we would wrap
 
 Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays
 nicely with tools for building vectorized functions, like
-:func:`numpy.broadcast_arrays` and :func:`numpy.vectorize`. For high performance
+:py:func:`numpy.broadcast_arrays` and :py:class:`numpy.vectorize`. For high performance
 needs, consider using Numba's :doc:`vectorize and guvectorize <numba:user/vectorize>`.
 
 In addition to wrapping functions, ``apply_ufunc`` can automatically parallelize
diff --git a/doc/dask.rst b/doc/dask.rst
index 11f378aa376..ed99ffaa896 100644
--- a/doc/dask.rst
+++ b/doc/dask.rst
@@ -285,7 +285,7 @@ automate `embarrassingly parallel
 <https://en.wikipedia.org/wiki/Embarrassingly_parallel>`__ "map" type operations
 where a function written for processing NumPy arrays should be repeatedly
 applied to xarray objects containing Dask arrays. It works similarly to
-:py:func:`dask.array.map_blocks` and :py:func:`dask.array.atop`, but without
+:py:func:`dask.array.map_blocks` and :py:func:`dask.array.blockwise`, but without
 requiring an intermediate layer of abstraction.
 
 For the best performance when using Dask's multi-threaded scheduler, wrap a
diff --git a/doc/data-structures.rst b/doc/data-structures.rst
index 93cdc7e9765..d5c8fa961f7 100644
--- a/doc/data-structures.rst
+++ b/doc/data-structures.rst
@@ -45,7 +45,7 @@ Creating a DataArray
 The :py:class:`~xarray.DataArray` constructor takes:
 
 - ``data``: a multi-dimensional array of values (e.g., a numpy ndarray,
-  :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or :py:class:`~pandas.Panel`)
+  :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or ``pandas.Panel``)
 - ``coords``: a list or dictionary of coordinates. If a list, it should be a
   list of tuples where the first element is the dimension name and the second
   element is the corresponding coordinate array_like object.
@@ -125,7 +125,7 @@ As a dictionary with coords across multiple dimensions:
 
 If you create a ``DataArray`` by supplying a pandas
 :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or
-:py:class:`~pandas.Panel`, any non-specified arguments in the
+``pandas.Panel``, any non-specified arguments in the
 ``DataArray`` constructor will be filled in from the pandas object:
 
 .. ipython:: python
@@ -301,7 +301,7 @@ names, and its data is aligned to any existing dimensions.
 
 You can also create an dataset from:
 
-- A :py:class:`pandas.DataFrame` or :py:class:`pandas.Panel` along its columns and items
+- A :py:class:`pandas.DataFrame` or ``pandas.Panel`` along its columns and items
   respectively, by passing it into the :py:class:`~xarray.Dataset` directly
 - A :py:class:`pandas.DataFrame` with :py:meth:`Dataset.from_dataframe <xarray.Dataset.from_dataframe>`,
   which will additionally handle MultiIndexes See :ref:`pandas`
diff --git a/doc/pandas.rst b/doc/pandas.rst
index 4f3088b4c34..72abf6609f6 100644
--- a/doc/pandas.rst
+++ b/doc/pandas.rst
@@ -112,7 +112,7 @@ automatically stacking them into a ``MultiIndex``.
 :py:meth:`DataArray.to_pandas() <xarray.DataArray.to_pandas>` is a shortcut that
 lets you convert a DataArray directly into a pandas object with the same
 dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`,
-2D to :py:class:`~pandas.DataFrame` and 3D to :py:class:`~pandas.Panel`):
+2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``):
 
 .. ipython:: python
 
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 0c929b5b711..105d661b5f7 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -112,9 +112,9 @@ Bug fixes
   In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions.
-  (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
+  (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian>`_
 - Allow appending datetime and bool data variables to zarr stores.
-  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
+  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa>`_.
 - Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
   (:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Add support for pandas >=0.26 (:issue:`3440`).
@@ -239,9 +239,9 @@ Bug fixes
   (:issue:`3317`). By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord
   now plot the correct data for 2D DataArrays
-  (:issue:`3334`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  (:issue:`3334`). By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but
-  not others (:issue:`508`). By `Deepak Cherian <http://github.com/dcherian>`_.
+  not others (:issue:`508`). By `Deepak Cherian <https://github.com/dcherian>`_.
 - The default behaviour of reducing across all dimensions for
   :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed
   as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`).
@@ -249,26 +249,26 @@ Bug fixes
   Also raise nicer error message when no groups are created (:issue:`1764`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
-  By `Deepak Cherian <https://github.com/dcherian/>`_.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
   specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
-  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
+  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian>`_.
 
 Documentation
 ~~~~~~~~~~~~~
 
 - Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`).
-  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+  By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 - Created a "How do I..." section (:ref:`howdoi`) for solutions to common questions. (:pull:`3357`).
-  By `Deepak Cherian <https://github.com/dcherian/>`_.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`
   (pull:`3331`, pull:`3331`). By `Justus Magin <https://github.com/keewis>`_.
 - Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`,
   :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`,
-  :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna` (pull:`3328`).
+  :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna` (:pull:`3328`).
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
 - Fixed documentation to clean up an unwanted file created in ``ipython`` example
-  (:pull:`3353`). By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+  (:pull:`3353`). By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
 .. _whats-new.0.13.0:
 
@@ -322,7 +322,7 @@ Breaking changes
 - :py:meth:`DataArray.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous
   positional arguments were deprecated)
 - Reindexing with variables of a different dimension now raise an error (previously deprecated)
-- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of
+- ``xarray.broadcast_array`` is removed (previously deprecated in favor of
   :py:func:`~xarray.broadcast`)
 - :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of
   :py:meth:`Variable.set_dims`)
@@ -358,7 +358,7 @@ New functions/methods
 
 - Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`.
   By `Deepak Cherian <https://github.com/dcherian>`_ and `David Mertz
-  <http://github.com/DavidMertz>`_.
+  <https://github.com/DavidMertz>`_.
 
 - Dataset plotting API for visualizing dependencies between two DataArrays!
   Currently only :py:meth:`Dataset.plot.scatter` is implemented.
@@ -404,21 +404,21 @@ Enhancements
   By `Gerardo Rivera <https://github.com/dangomelon>`_.
 
 - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
-  with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.
+  with ``engine="h5netcdf"``. It is passed to ``h5netcdf.File``.
   By `Ulrich Herter <https://github.com/ulijh>`_.
 
-- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index
+- ``xarray.Dataset.drop`` now supports keyword arguments; dropping index
   labels by using both ``dim`` and ``labels`` or using a
   :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are
   deprecated (:issue:`2910`).
-  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+  By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
 - Added examples of :py:meth:`Dataset.set_index` and
   :py:meth:`DataArray.set_index`, as well are more specific error messages
   when the user passes invalid arguments (:issue:`3176`).
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
-- :py:func:`filter_by_attrs` now filters the coordinates as well as the variables.
+- :py:meth:`Dataset.filter_by_attrs` now filters the coordinates as well as the variables.
   By `Spencer Jones <https://github.com/cspencerjones>`_.
 
 Bug fixes
@@ -445,7 +445,7 @@ Bug fixes
   By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
 - Fixed bug in ``combine_by_coords()`` causing a `ValueError` if the input had
   an unused dimension with coordinates which were not monotonic (:issue:`3150`).
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Fixed crash when applying ``distributed.Client.compute()`` to a DataArray
   (:issue:`3171`). By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Better error message when using groupby on an empty DataArray (:issue:`3037`).
@@ -469,7 +469,7 @@ Documentation
 
 - Fixed documentation to clean up unwanted files created in ``ipython`` examples
   (:issue:`3227`).
-  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+  By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
 .. _whats-new.0.12.3:
 
@@ -539,7 +539,7 @@ New functions/methods
   To avoid FutureWarnings switch to using ``combine_nested`` or
   ``combine_by_coords``, (or set the ``combine`` argument in
   ``open_mfdataset``). (:issue:`2159`)
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 - :py:meth:`~xarray.DataArray.rolling_exp` and
   :py:meth:`~xarray.Dataset.rolling_exp` added, similar to pandas'
@@ -585,12 +585,12 @@ Enhancements to existing functionality
   :py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now
   accept a keyword argument ``restore_coord_dims`` which keeps the order
   of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
-  By `Peter Hausamann <http://github.com/phausamann>`_.
+  By `Peter Hausamann <https://github.com/phausamann>`_.
 - Clean up Python 2 compatibility in code (:issue:`2950`)
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 - Better warning message when supplying invalid objects to ``xr.merge``
   (:issue:`2948`).  By `Mathias Hauser <https://github.com/mathause>`_.
-- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims`
+- Add ``errors`` keyword argument to ``Dataset.drop`` and :py:meth:`Dataset.drop_dims`
   that allows ignoring errors if a passed label or dimension is not in the dataset
   (:issue:`2994`).
   By `Andrew Ross <https://github.com/andrew-c-ross>`_.
@@ -786,7 +786,7 @@ Bug fixes
   `Spencer Clark <https://github.com/spencerkclark>`_.
 - Line plots with the ``x`` argument set to a non-dimensional coord now plot
   the correct data for 1D DataArrays.
-  (:issue:`2725`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  (:issue:`2725`). By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Subtracting a scalar ``cftime.datetime`` object from a
   :py:class:`CFTimeIndex` now results in a :py:class:`pandas.TimedeltaIndex`
   instead of raising a ``TypeError`` (:issue:`2671`).  By `Spencer Clark
@@ -802,14 +802,14 @@ Bug fixes
   By `Yohai Bar-Sinai <https://github.com/yohai>`_.
 - Fixed error when trying to reduce a DataArray using a function which does not
   require an axis argument. (:issue:`2768`)
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Concatenating a sequence of :py:class:`~xarray.DataArray` with varying names
   sets the name of the output array to ``None``, instead of the name of the
   first input array. If the names are the same it sets the name to that,
   instead to the name of the first DataArray in the list as it did before.
-  (:issue:`2775`). By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  (:issue:`2775`). By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
-- Per `CF conventions
+- Per the `CF conventions section on calendars
   <http://cfconventions.org/cf-conventions/cf-conventions.html#calendar>`_,
   specifying ``'standard'`` as the calendar type in
   :py:meth:`~xarray.cftime_range` now correctly refers to the ``'gregorian'``
@@ -827,7 +827,7 @@ Bug fixes
   (e.g. '2000-01-01T00:00:00-05:00') no longer raises an error
   (:issue:`2649`).  By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Fixed performance regression with ``open_mfdataset`` (:issue:`2662`).
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Fixed supplying an explicit dimension in the ``concat_dim`` argument to
   to ``open_mfdataset`` (:issue:`2647`).
   By `Ben Root <https://github.com/WeatherGod>`_.
@@ -892,13 +892,13 @@ Enhancements
   but were not explicitly closed. This is mostly useful for debugging; we
   recommend enabling it in your test suites if you use xarray for IO.
   By `Stephan Hoyer <https://github.com/shoyer>`_
-- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
+- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://github.com/mrocklin>`_.
 - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
   ``loffset`` kwarg just like Pandas.
   By `Deepak Cherian <https://github.com/dcherian>`_
 - Datasets are now guaranteed to have a ``'source'`` encoding, so the source
   file name is always stored (:issue:`2550`).
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - The ``apply`` methods for ``DatasetGroupBy``, ``DataArrayGroupBy``,
   ``DatasetResample`` and ``DataArrayResample`` now support passing positional
   arguments to the applied function as a tuple to the ``args`` argument.
@@ -1020,7 +1020,7 @@ Enhancements
   dataset and dataarray attrs upon operations. The option is set with
   ``xarray.set_options(keep_attrs=True)``, and the default is to use the old
   behaviour.
-  By `Tom Nicholas <http://github.com/TomNicholas>`_.
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Added a new backend for the GRIB file format based on ECMWF *cfgrib*
   python driver and *ecCodes* C-library. (:issue:`2475`)
   By `Alessandro Amici <https://github.com/alexamici>`_,
@@ -1076,7 +1076,7 @@ Bug fixes
   CFTimeIndex is now allowed (:issue:`2484`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Avoid use of Dask's deprecated ``get=`` parameter in tests
-  by `Matthew Rocklin <https://github.com/mrocklin/>`_.
+  by `Matthew Rocklin <https://github.com/mrocklin>`_.
 - An ``OverflowError`` is now accurately raised and caught during the
   encoding process if a reference date is used that is so distant that
   the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
@@ -1122,7 +1122,7 @@ Enhancements
   (:issue:`2230`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
-- :py:meth:`plot()` now accepts the kwargs
+- :py:func:`~plot.plot()` now accepts the kwargs
   ``xscale, yscale, xlim, ylim, xticks, yticks`` just like Pandas. Also ``xincrease=False, yincrease=False`` now use matplotlib's axis inverting methods instead of setting limits.
   By `Deepak Cherian <https://github.com/dcherian>`_. (:issue:`2224`)
 
@@ -1189,7 +1189,7 @@ Bug fixes
 - Follow up the renamings in dask; from dask.ghost to dask.overlap
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
-- Now :py:func:`xr.apply_ufunc` raises a ValueError when the size of
+- Now :py:func:`~xarray.apply_ufunc` raises a ValueError when the size of
   ``input_core_dims`` is inconsistent with the number of arguments.
   (:issue:`2341`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
@@ -1272,7 +1272,7 @@ Enhancements
 
 - :py:meth:`~xarray.DataArray.interp` and :py:meth:`~xarray.Dataset.interp`
   methods are newly added.
-  See :ref:`interpolating values with interp` for the detail.
+  See :ref:`interp` for the detail.
   (:issue:`2079`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
@@ -1389,7 +1389,7 @@ non-standard calendars used in climate modeling.
 Documentation
 ~~~~~~~~~~~~~
 
-- New FAQ entry, :ref:`faq.other_projects`.
+- New FAQ entry, :ref:`related-projects`.
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - :ref:`assigning_values` now includes examples on how to select and assign
   values to a :py:class:`~xarray.DataArray` with ``.loc``.
@@ -1445,7 +1445,7 @@ Bug fixes
 - ``ValueError`` is raised when coordinates with the wrong size are assigned to
   a :py:class:`DataArray`. (:issue:`2112`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
-- Fixed a bug in :py:meth:`~xarary.DatasArray.rolling` with bottleneck. Also,
+- Fixed a bug in :py:meth:`~xarray.DataArray.rolling` with bottleneck. Also,
   fixed a bug in rolling an integer dask array. (:issue:`2113`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Fixed a bug where `keep_attrs=True` flag was neglected if
@@ -1482,7 +1482,7 @@ Enhancements
   supplied list, returning a bool array. See :ref:`selecting values with isin`
   for full details. Similar to the ``np.isin`` function.
   By `Maximilian Roos <https://github.com/max-sixty>`_.
-- Some speed improvement to construct :py:class:`~xarray.DataArrayRolling`
+- Some speed improvement to construct :py:class:`~xarray.core.rolling.DataArrayRolling`
   object (:issue:`1993`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Handle variables with different values for ``missing_value`` and
@@ -1562,8 +1562,8 @@ Enhancements
   NumPy. By `Stephan Hoyer <https://github.com/shoyer>`_.
 
 - Improve :py:func:`~xarray.DataArray.rolling` logic.
-  :py:func:`~xarray.DataArrayRolling` object now supports
-  :py:func:`~xarray.DataArrayRolling.construct` method that returns a view
+  :py:func:`~xarray.core.rolling.DataArrayRolling` object now supports
+  :py:func:`~xarray.core.rolling.DataArrayRolling.construct` method that returns a view
   of the DataArray / Dataset object with the rolling-window dimension added
   to the last axis. This enables more flexible operation, such as strided
   rolling, windowed rolling, ND-rolling, short-time FFT and convolution.
@@ -1634,7 +1634,7 @@ Enhancements
   1D coordinate (e.g. time) and a 2D coordinate (e.g. depth as a function of
   time) (:issue:`1737`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- :py:func:`~plot()` rotates x-axis ticks if x-axis is time.
+- :py:func:`~plot.plot()` rotates x-axis ticks if x-axis is time.
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:func:`~plot.line()` can draw multiple lines if provided with a
   2D variable.
@@ -1909,7 +1909,7 @@ Enhancements
   concatenated array/dataset (:issue:`1521`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 
-- Speed-up (x 100) of :py:func:`~xarray.conventions.decode_cf_datetime`.
+- Speed-up (x 100) of ``xarray.conventions.decode_cf_datetime``.
   By `Christian Chwala <https://github.com/cchwala>`_.
 
 **IO related improvements**
@@ -2555,7 +2555,7 @@ Enhancements
   raising an error (:issue:`1082`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 - Options for axes sharing between subplots are exposed to
-  :py:class:`FacetGrid` and :py:func:`~xarray.plot.plot`, so axes
+  :py:class:`~xarray.plot.FacetGrid` and :py:func:`~xarray.plot.plot`, so axes
   sharing can be disabled for polar plots.
   By `Bas Hoonhout <https://github.com/hoonhout>`_.
 - New utility functions :py:func:`~xarray.testing.assert_equal`,
@@ -2571,8 +2571,8 @@ Enhancements
   similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`).
   By `Joe Hamman <https://github.com/jhamman>`_.
 - Added the ability write unlimited netCDF dimensions with the ``scipy`` and
-  ``netcdf4`` backends via the new :py:attr:`~xray.Dataset.encoding` attribute
-  or via the ``unlimited_dims`` argument to :py:meth:`~xray.Dataset.to_netcdf`.
+  ``netcdf4`` backends via the new ``xray.Dataset.encoding`` attribute
+  or via the ``unlimited_dims`` argument to ``xray.Dataset.to_netcdf``.
   By `Joe Hamman <https://github.com/jhamman>`_.
 - New :py:meth:`~DataArray.quantile` method to calculate quantiles from
   DataArray objects (:issue:`1187`).
@@ -2651,10 +2651,9 @@ Bug fixes
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- :py:meth:`~xarray.Dataset.isel_points` and
-  :py:meth:`~xarray.Dataset.sel_points` now use vectorised indexing in numpy
-  and dask (:issue:`1161`), which can result in several orders of magnitude
-  speedup.
+- ``xarray.Dataset.isel_points`` and ``xarray.Dataset.sel_points`` now
+  use vectorised indexing in numpy and dask (:issue:`1161`), which can
+  result in several orders of magnitude speedup.
   By `Jonathan Chambers <https://github.com/mangecoeur>`_.
 
 .. _whats-new.0.8.2:
@@ -2763,16 +2762,17 @@ Enhancements
   any number of ``Dataset`` and/or ``DataArray`` variables. See :ref:`merge`
   for more details. By `Stephan Hoyer <https://github.com/shoyer>`_.
 
-- DataArray and Dataset method :py:meth:`resample` now supports the
+- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now support the
   ``keep_attrs=False`` option that determines whether variable and dataset
   attributes are retained in the resampled object. By
   `Jeremy McGibbon <https://github.com/mcgibbon>`_.
 
-- Better multi-index support in DataArray and Dataset :py:meth:`sel` and
-  :py:meth:`loc` methods, which now behave more closely to pandas and which
-  also accept dictionaries for indexing based on given level names and labels
-  (see :ref:`multi-level indexing`). By
-  `Benoit Bovy <https://github.com/benbovy>`_.
+- Better multi-index support in :py:meth:`DataArray.sel`,
+  :py:meth:`DataArray.loc`, :py:meth:`Dataset.sel` and
+  :py:meth:`Dataset.loc`, which now behave more closely to pandas and
+  which also accept dictionaries for indexing based on given level names
+  and labels (see :ref:`multi-level indexing`).
+  By `Benoit Bovy <https://github.com/benbovy>`_.
 
 - New (experimental) decorators :py:func:`~xarray.register_dataset_accessor` and
   :py:func:`~xarray.register_dataarray_accessor` for registering custom xarray
@@ -2788,7 +2788,7 @@ Enhancements
   allowing more control on the colorbar (:issue:`872`).
   By `Fabien Maussion <https://github.com/fmaussion>`_.
 
-- New Dataset method :py:meth:`filter_by_attrs`, akin to
+- New Dataset method :py:meth:`Dataset.filter_by_attrs`, akin to
   ``netCDF4.Dataset.get_variables_by_attributes``, to easily filter
   data variables using its attributes.
   `Filipe Fernandes <https://github.com/ocefpaf>`_.
@@ -2915,7 +2915,7 @@ Enhancements
 
 - Numerical operations now return empty objects on no overlapping labels rather
   than raising ``ValueError`` (:issue:`739`).
-- :py:class:`~pd.Series` is now supported as valid input to the ``Dataset``
+- :py:class:`~pandas.Series` is now supported as valid input to the ``Dataset``
   constructor (:issue:`740`).
 
 Bug fixes
@@ -2934,7 +2934,7 @@ Bug fixes
   reindexing leads to NaN values (:issue:`738`).
 - ``Dataset.rename`` and ``DataArray.rename`` support the old and new names
   being the same (:issue:`724`).
-- Fix :py:meth:`~xarray.Dataset.from_dataset` for DataFrames with Categorical
+- Fix :py:meth:`~xarray.Dataset.from_dataframe` for DataFrames with Categorical
   column and a MultiIndex index (:issue:`737`).
 - Fixes to ensure xarray works properly after the upcoming pandas v0.18 and
   NumPy v1.11 releases.
@@ -2985,7 +2985,7 @@ recommend switching your import statements to ``import xarray as xr``.
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
-- The internal data model used by :py:class:`~xray.DataArray` has been
+- The internal data model used by ``xray.DataArray`` has been
   rewritten to fix several outstanding issues (:issue:`367`, :issue:`634`,
   `this stackoverflow report`_). Internally, ``DataArray`` is now implemented
   in terms of ``._variable`` and ``._coords`` attributes instead of holding
@@ -3023,7 +3023,7 @@ Breaking changes
       * x        (x) int64 0 1 2
 
 - It is no longer possible to convert a DataArray to a Dataset with
-  :py:meth:`xray.DataArray.to_dataset` if it is unnamed. This will now
+  ``xray.DataArray.to_dataset`` if it is unnamed. This will now
   raise ``ValueError``. If the array is unnamed, you need to supply the
   ``name`` argument.
 
@@ -3092,7 +3092,7 @@ Enhancements
 - Plotting: more control on colormap parameters (:issue:`642`). ``vmin`` and
   ``vmax`` will not be silently ignored anymore. Setting ``center=False``
   prevents automatic selection of a divergent colormap.
-- New :py:meth:`~xray.Dataset.shift` and :py:meth:`~xray.Dataset.roll` methods
+- New ``xray.Dataset.shift`` and ``xray.Dataset.roll`` methods
   for shifting/rotating datasets or arrays along a dimension:
 
   .. ipython:: python
@@ -3106,9 +3106,9 @@ Enhancements
   moves both data and coordinates.
 - Assigning a ``pandas`` object directly as a ``Dataset`` variable is now permitted. Its
   index names correspond to the ``dims`` of the ``Dataset``, and its data is aligned.
-- Passing a :py:class:`pandas.DataFrame` or :py:class:`pandas.Panel` to a Dataset constructor
+- Passing a :py:class:`pandas.DataFrame` or ``pandas.Panel`` to a Dataset constructor
   is now permitted.
-- New function :py:func:`~xray.broadcast` for explicitly broadcasting
+- New function ``xray.broadcast`` for explicitly broadcasting
   ``DataArray`` and ``Dataset`` objects against each other. For example:
 
   .. ipython:: python
@@ -3166,7 +3166,7 @@ API Changes
 ~~~~~~~~~~~
 
 - The handling of colormaps and discrete color lists for 2D plots in
-  :py:meth:`~xray.DataArray.plot` was changed to provide more compatibility
+  ``xray.DataArray.plot`` was changed to provide more compatibility
   with matplotlib's ``contour`` and ``contourf`` functions (:issue:`538`).
   Now discrete lists of colors should be specified using ``colors`` keyword,
   rather than ``cmap``.
@@ -3174,10 +3174,10 @@ API Changes
 Enhancements
 ~~~~~~~~~~~~
 
-- Faceted plotting through :py:class:`~xray.plot.FacetGrid` and the
-  :py:meth:`~xray.plot.plot` method. See :ref:`plotting.faceting` for more details
+- Faceted plotting through ``xray.plot.FacetGrid`` and the
+  ``xray.plot.plot`` method. See :ref:`plotting.faceting` for more details
   and examples.
-- :py:meth:`~xray.Dataset.sel` and :py:meth:`~xray.Dataset.reindex` now support
+- ``xray.Dataset.sel`` and ``xray.Dataset.reindex`` now support
   the ``tolerance`` argument for controlling nearest-neighbor selection
   (:issue:`629`):
 
@@ -3194,12 +3194,12 @@ Enhancements
       * x        (x) float64 0.9 1.5
 
   This feature requires pandas v0.17 or newer.
-- New ``encoding`` argument in :py:meth:`~xray.Dataset.to_netcdf` for writing
+- New ``encoding`` argument in ``xray.Dataset.to_netcdf`` for writing
   netCDF files with compression, as described in the new documentation
   section on :ref:`io.netcdf.writing_encoded`.
-- Add :py:attr:`~xray.Dataset.real` and :py:attr:`~xray.Dataset.imag`
+- Add ``xray.Dataset.real`` and ``xray.Dataset.imag``
   attributes to Dataset and DataArray (:issue:`553`).
-- More informative error message with :py:meth:`~xray.Dataset.from_dataframe`
+- More informative error message with ``xray.Dataset.from_dataframe``
   if the frame has duplicate columns.
 - xray now uses deterministic names for dask arrays it creates or opens from
   disk. This allows xray users to take advantage of dask's nascent support for
@@ -3214,9 +3214,9 @@ Bug fixes
 - Aggregation functions now correctly skip ``NaN`` for data for ``complex128``
   dtype (:issue:`554`).
 - Fixed indexing 0d arrays with unicode dtype (:issue:`568`).
-- :py:meth:`~xray.DataArray.name` and Dataset keys must be a string or None to
+- ``xray.DataArray.name`` and Dataset keys must be a string or None to
   be written to netCDF (:issue:`533`).
-- :py:meth:`~xray.DataArray.where` now uses dask instead of numpy if either the
+- ``xray.DataArray.where`` now uses dask instead of numpy if either the
   array or ``other`` is a dask array. Previously, if ``other`` was a numpy array
   the method was evaluated eagerly.
 - Global attributes are now handled more consistently when loading remote
@@ -3243,24 +3243,24 @@ v0.6.0 (21 August 2015)
 
 This release includes numerous bug fixes and enhancements. Highlights
 include the introduction of a plotting module and the new Dataset and DataArray
-methods :py:meth:`~xray.Dataset.isel_points`, :py:meth:`~xray.Dataset.sel_points`,
-:py:meth:`~xray.Dataset.where` and :py:meth:`~xray.Dataset.diff`. There are no
+methods ``xray.Dataset.isel_points``, ``xray.Dataset.sel_points``,
+``xray.Dataset.where`` and ``xray.Dataset.diff``. There are no
 breaking changes from v0.5.2.
 
 Enhancements
 ~~~~~~~~~~~~
 
 - Plotting methods have been implemented on DataArray objects
-  :py:meth:`~xray.DataArray.plot` through integration with matplotlib
+  ``xray.DataArray.plot`` through integration with matplotlib
   (:issue:`185`). For an introduction, see :ref:`plotting`.
 - Variables in netCDF files with multiple missing values are now decoded as NaN
   after issuing a warning if open_dataset is called with mask_and_scale=True.
 - We clarified our rules for when the result from an xray operation is a copy
-  vs. a view (see :ref:`copies vs views` for more details).
+  vs. a view (see :ref:`copies_vs_views` for more details).
 - Dataset variables are now written to netCDF files in order of appearance
   when using the netcdf4 backend (:issue:`479`).
 
-- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.Dataset.sel_points`
+- Added ``xray.Dataset.isel_points`` and ``xray.Dataset.sel_points``
   to support pointwise indexing of Datasets and DataArrays (:issue:`475`).
 
   .. ipython::
@@ -3305,7 +3305,7 @@ Enhancements
         x        (points) |S1 'a' 'b' 'g'
       * points   (points) int64 0 1 2
 
-- New :py:meth:`~xray.Dataset.where` method for masking xray objects according
+- New ``xray.Dataset.where`` method for masking xray objects according
   to some criteria. This works particularly well with multi-dimensional data:
 
   .. ipython:: python
@@ -3316,11 +3316,10 @@ Enhancements
     @savefig where_example.png width=4in height=4in
     ds.distance.where(ds.distance < 100).plot()
 
-- Added new methods :py:meth:`DataArray.diff <xray.DataArray.diff>`
-  and :py:meth:`Dataset.diff <xray.Dataset.diff>` for finite
-  difference calculations along a given axis.
+- Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff``
+  for finite difference calculations along a given axis.
 
-- New :py:meth:`~xray.DataArray.to_masked_array` convenience method for
+- New ``xray.DataArray.to_masked_array`` convenience method for
   returning a numpy.ma.MaskedArray.
 
   .. ipython:: python
@@ -3329,7 +3328,7 @@ Enhancements
     da.where(da < 0.5)
     da.where(da < 0.5).to_masked_array(copy=True)
 
-- Added new flag "drop_variables" to :py:meth:`~xray.open_dataset` for
+- Added new flag "drop_variables" to ``xray.open_dataset`` for
   excluding variables from being parsed. This may be useful to drop
   variables with problems or inconsistent values.
 
@@ -3358,7 +3357,7 @@ options for ``xray.concat``.
 Backwards incompatible changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- The optional arguments ``concat_over`` and ``mode`` in :py:func:`~xray.concat` have
+- The optional arguments ``concat_over`` and ``mode`` in ``xray.concat`` have
   been removed and replaced by ``data_vars`` and ``coords``. The new arguments are both
   more easily understood and more robustly implemented, and allowed us to fix a bug
   where ``concat`` accidentally loaded data into memory. If you set values for
@@ -3368,16 +3367,16 @@ Backwards incompatible changes
 Enhancements
 ~~~~~~~~~~~~
 
-- :py:func:`~xray.open_mfdataset` now supports a ``preprocess`` argument for
+- ``xray.open_mfdataset`` now supports a ``preprocess`` argument for
   preprocessing datasets prior to concatenaton. This is useful if datasets
   cannot be otherwise merged automatically, e.g., if the original datasets
   have conflicting index coordinates (:issue:`443`).
-- :py:func:`~xray.open_dataset` and :py:func:`~xray.open_mfdataset` now use a
+- ``xray.open_dataset`` and ``xray.open_mfdataset`` now use a
   global thread lock by default for reading from netCDF files with dask. This
   avoids possible segmentation faults for reading from netCDF4 files when HDF5
   is not configured properly for concurrent access (:issue:`444`).
 - Added support for serializing arrays of complex numbers with `engine='h5netcdf'`.
-- The new :py:func:`~xray.save_mfdataset` function allows for saving multiple
+- The new ``xray.save_mfdataset`` function allows for saving multiple
   datasets to disk simultaneously. This is useful when processing large datasets
   with dask.array. For example, to save a dataset too big to fit into memory
   to one file per year, we could write:
@@ -3396,7 +3395,7 @@ Bug fixes
 
 - Fixed ``min``, ``max``, ``argmin`` and ``argmax`` for arrays with string or
   unicode types (:issue:`453`).
-- :py:func:`~xray.open_dataset` and :py:func:`~xray.open_mfdataset` support
+- ``xray.open_dataset`` and ``xray.open_mfdataset`` support
   supplying chunks as a single integer.
 - Fixed a bug in serializing scalar datetime variable to netCDF.
 - Fixed a bug that could occur in serialization of 0-dimensional integer arrays.
@@ -3413,9 +3412,9 @@ adds the ``pipe`` method, copied from pandas.
 Enhancements
 ~~~~~~~~~~~~
 
-- Added :py:meth:`~xray.Dataset.pipe`, replicating the `new pandas method`_ in version
+- Added ``xray.Dataset.pipe``, replicating the `new pandas method`_ in version
   0.16.2. See :ref:`transforming datasets` for more details.
-- :py:meth:`~xray.Dataset.assign` and :py:meth:`~xray.Dataset.assign_coords`
+- ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords``
   now assign new variables in sorted (alphabetical) order, mirroring the
   behavior in pandas. Previously, the order was arbitrary.
 
@@ -3437,7 +3436,7 @@ Highlights
 
 The headline feature in this release is experimental support for out-of-core
 computing (data that doesn't fit into memory) with dask_. This includes a new
-top-level function :py:func:`~xray.open_mfdataset` that makes it easy to open
+top-level function ``xray.open_mfdataset`` that makes it easy to open
 a collection of netCDF (using dask) as a single ``xray.Dataset`` object. For
 more on dask, read the `blog post introducing xray + dask`_ and the new
 documentation section :doc:`dask`.
@@ -3452,7 +3451,7 @@ Backwards incompatible changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - The logic used for choosing which variables are concatenated with
-  :py:func:`~xray.concat` has changed. Previously, by default any variables
+  ``xray.concat`` has changed. Previously, by default any variables
   which were equal across a dimension were not concatenated. This lead to some
   surprising behavior, where the behavior of groupby and concat operations
   could depend on runtime values (:issue:`268`). For example:
@@ -3487,8 +3486,8 @@ Backwards incompatible changes
 Enhancements
 ~~~~~~~~~~~~
 
-- New :py:meth:`~xray.Dataset.to_array` and enhanced
-  :py:meth:`~xray.DataArray.to_dataset` methods make it easy to switch back
+- New ``xray.Dataset.to_array`` and enhanced
+  ``xray.DataArray.to_dataset`` methods make it easy to switch back
   and forth between arrays and datasets:
 
   .. ipython:: python
@@ -3498,7 +3497,7 @@ Enhancements
       ds.to_array()
       ds.to_array().to_dataset(dim='variable')
 
-- New :py:meth:`~xray.Dataset.fillna` method to fill missing values, modeled
+- New ``xray.Dataset.fillna`` method to fill missing values, modeled
   off the pandas method of the same name:
 
   .. ipython:: python
@@ -3510,7 +3509,7 @@ Enhancements
   index based alignment and broadcasting like standard binary operations. It
   also can be applied by group, as illustrated in
   :ref:`fill with climatology`.
-- New :py:meth:`~xray.Dataset.assign` and :py:meth:`~xray.Dataset.assign_coords`
+- New ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords``
   methods patterned off the new :py:meth:`DataFrame.assign <pandas.DataFrame.assign>`
   method in pandas:
 
@@ -3522,8 +3521,8 @@ Enhancements
 
   These methods return a new Dataset (or DataArray) with updated data or
   coordinate variables.
-- :py:meth:`~xray.Dataset.sel` now supports the ``method`` parameter, which works
-  like the paramter of the same name on :py:meth:`~xray.Dataset.reindex`. It
+- ``xray.Dataset.sel`` now supports the ``method`` parameter, which works
+  like the paramter of the same name on ``xray.Dataset.reindex``. It
   provides a simple interface for doing nearest-neighbor interpolation:
 
   .. use verbatim because I can't seem to install pandas 0.16.1 on RTD :(
@@ -3560,7 +3559,7 @@ Enhancements
 - Accessing data from remote datasets now has retrying logic (with exponential
   backoff) that should make it robust to occasional bad responses from DAP
   servers.
-- You can control the width of the Dataset repr with :py:class:`xray.set_options`.
+- You can control the width of the Dataset repr with ``xray.set_options``.
   It can be used either as a context manager, in which case the default is restored
   outside the context:
 
@@ -3586,7 +3585,7 @@ Deprecations
 ~~~~~~~~~~~~
 
 - The method ``load_data()`` has been renamed to the more succinct
-  :py:meth:`~xray.Dataset.load`.
+  ``xray.Dataset.load``.
 
 v0.4.1 (18 March 2015)
 ----------------------
@@ -3599,7 +3598,7 @@ Enhancements
 
 - New documentation sections on :ref:`time-series` and
   :ref:`combining multiple files`.
-- :py:meth:`~xray.Dataset.resample` lets you resample a dataset or data array to
+- ``xray.Dataset.resample`` lets you resample a dataset or data array to
   a new temporal resolution. The syntax is the `same as pandas`_, except you
   need to supply the time dimension explicitly:
 
@@ -3642,7 +3641,7 @@ Enhancements
       array.resample('1D', dim='time', how='first')
 
 
-- :py:meth:`~xray.Dataset.swap_dims` allows for easily swapping one dimension
+- ``xray.Dataset.swap_dims`` allows for easily swapping one dimension
   out for another:
 
   .. ipython:: python
@@ -3652,7 +3651,7 @@ Enhancements
        ds.swap_dims({'x': 'y'})
 
   This was possible in earlier versions of xray, but required some contortions.
-- :py:func:`~xray.open_dataset` and :py:meth:`~xray.Dataset.to_netcdf` now
+- ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now
   accept an ``engine`` argument to explicitly select which underlying library
   (netcdf4 or scipy) is used for reading/writing a netCDF file.
 
@@ -3687,7 +3686,7 @@ Breaking changes
 
 - We now automatically align index labels in arithmetic, dataset construction,
   merging and updating. This means the need for manually invoking methods like
-  :py:func:`~xray.align` and :py:meth:`~xray.Dataset.reindex_like` should be
+  ``xray.align`` and ``xray.Dataset.reindex_like`` should be
   vastly reduced.
 
   :ref:`For arithmetic<math automatic alignment>`, we align
@@ -3739,7 +3738,7 @@ Breaking changes
       (a + b).coords
 
   This functionality can be controlled through the ``compat`` option, which
-  has also been added to the :py:class:`~xray.Dataset` constructor.
+  has also been added to the ``xray.Dataset`` constructor.
 - Datetime shortcuts such as ``'time.month'`` now return a ``DataArray`` with
   the name ``'month'``, not ``'time.month'`` (:issue:`345`). This makes it
   easier to index the resulting arrays when they are used with ``groupby``:
@@ -3777,7 +3776,7 @@ Breaking changes
 Enhancements
 ~~~~~~~~~~~~
 
-- Support for :py:meth:`~xray.Dataset.reindex` with a fill method. This
+- Support for ``xray.Dataset.reindex`` with a fill method. This
   provides a useful shortcut for upsampling:
 
   .. ipython:: python
@@ -3791,16 +3790,15 @@ Enhancements
 - Use functions that return generic ndarrays with DataArray.groupby.apply and
   Dataset.apply (:issue:`327` and :issue:`329`). Thanks Jeff Gerard!
 - Consolidated the functionality of ``dumps`` (writing a dataset to a netCDF3
-  bytestring) into :py:meth:`~xray.Dataset.to_netcdf` (:issue:`333`).
-- :py:meth:`~xray.Dataset.to_netcdf` now supports writing to groups in netCDF4
+  bytestring) into ``xray.Dataset.to_netcdf`` (:issue:`333`).
+- ``xray.Dataset.to_netcdf`` now supports writing to groups in netCDF4
   files (:issue:`333`). It also finally has a full docstring -- you should read
   it!
-- :py:func:`~xray.open_dataset` and :py:meth:`~xray.Dataset.to_netcdf` now
+- ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now
   work on netCDF3 files when netcdf4-python is not installed as long as scipy
   is available (:issue:`333`).
-- The new :py:meth:`Dataset.drop <xray.Dataset.drop>` and
-  :py:meth:`DataArray.drop <xray.DataArray.drop>` methods makes it easy to drop
-  explicitly listed variables or index labels:
+- The new ``xray.Dataset.drop`` and ``xray.DataArray.drop`` methods
+  makes it easy to drop explicitly listed variables or index labels:
 
   .. ipython:: python
      :okwarning:
@@ -3813,7 +3811,7 @@ Enhancements
       arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))])
       arr.drop(['a', 'c'], dim='x')
 
-- :py:meth:`~xray.Dataset.broadcast_equals` has been added to correspond to
+- ``xray.Dataset.broadcast_equals`` has been added to correspond to
   the new ``compat`` option.
 - Long attributes are now truncated at 500 characters when printing a dataset
   (:issue:`338`). This should make things more convenient for working with
@@ -3839,8 +3837,8 @@ Deprecations
 ~~~~~~~~~~~~
 
 - ``dump`` and ``dumps`` have been deprecated in favor of
-  :py:meth:`~xray.Dataset.to_netcdf`.
-- ``drop_vars`` has been deprecated in favor of :py:meth:`~xray.Dataset.drop`.
+  ``xray.Dataset.to_netcdf``.
+- ``drop_vars`` has been deprecated in favor of ``xray.Dataset.drop``.
 
 Future plans
 ~~~~~~~~~~~~
@@ -3970,10 +3968,10 @@ backwards incompatible changes.
 New features
 ~~~~~~~~~~~~
 
-- Added :py:meth:`~xray.Dataset.count` and :py:meth:`~xray.Dataset.dropna`
+- Added ``xray.Dataset.count`` and ``xray.Dataset.dropna``
   methods, copied from pandas, for working with missing values (:issue:`247`,
   :issue:`58`).
-- Added :py:meth:`DataArray.to_pandas <xray.DataArray.to_pandas>` for
+- Added ``xray.DataArray.to_pandas`` for
   converting a data array into the pandas object with the same dimensionality
   (1D to Series, 2D to DataFrame, etc.) (:issue:`255`).
 - Support for reading gzipped netCDF3 files (:issue:`239`).
@@ -4006,7 +4004,7 @@ New features
   of arrays of metadata that describe the grid on which the points in "variable"
   arrays lie. They are preserved (when unambiguous) even though mathematical
   operations.
-- **Dataset math** :py:class:`~xray.Dataset` objects now support all arithmetic
+- **Dataset math** ``xray.Dataset`` objects now support all arithmetic
   operations directly. Dataset-array operations map across all dataset
   variables; dataset-dataset operations act on each pair of variables with the
   same name.
@@ -4022,7 +4020,7 @@ Backwards incompatible changes
 
 - ``Dataset.__eq__`` and ``Dataset.__ne__`` are now element-wise operations
   instead of comparing all values to obtain a single boolean. Use the method
-  :py:meth:`~xray.Dataset.equals` instead.
+  ``xray.Dataset.equals`` instead.
 
 Deprecations
 ~~~~~~~~~~~~
@@ -4031,7 +4029,7 @@ Deprecations
 - ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of
   variable names instead.
 - ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
-  :py:meth:`~xray.DataArray.reset_coords` instead.
+  ``xray.DataArray.reset_coords`` instead.
 
 v0.2 (14 August 2014)
 ---------------------
@@ -4041,16 +4039,16 @@ fixes. Here are the highlights:
 
 - There is now a direct constructor for ``DataArray`` objects, which makes it
   possible to create a DataArray without using a Dataset. This is highlighted
-  in the refreshed :doc:`tutorial`.
+  in the refreshed ``tutorial``.
 - You can perform aggregation operations like ``mean`` directly on
-  :py:class:`~xray.Dataset` objects, thanks to Joe Hamman. These aggregation
+  ``xray.Dataset`` objects, thanks to Joe Hamman. These aggregation
   methods also worked on grouped datasets.
 - xray now works on Python 2.6, thanks to Anna Kuznetsova.
 - A number of methods and attributes were given more sensible (usually shorter)
   names: ``labeled`` -> ``sel``,  ``indexed`` -> ``isel``, ``select`` ->
   ``select_vars``, ``unselect`` -> ``drop_vars``, ``dimensions`` -> ``dims``,
   ``coordinates`` -> ``coords``, ``attributes`` -> ``attrs``.
-- New :py:meth:`~xray.Dataset.load_data` and :py:meth:`~xray.Dataset.close`
+- New ``xray.Dataset.load_data`` and ``xray.Dataset.close``
   methods for datasets facilitate lower level of control of data loaded from
   disk.
 
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 945b3937c43..23d09ba5e33 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -729,13 +729,13 @@ def open_mfdataset(
     ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
     ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
     ``combine='nested'`` in future. Requires dask to be installed. See documentation for
-    details on dask [1]. Attributes from the first dataset file are used for the
+    details on dask [1]_. Attributes from the first dataset file are used for the
     combined dataset.
 
     Parameters
     ----------
     paths : str or sequence
-        Either a string glob in the form "path/to/my/files/*.nc" or an explicit list of
+        Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of
         files to open. Paths can be given as strings or as pathlib Paths. If
         concatenation along more than one dimension is desired, then ``paths`` must be a
         nested list-of-lists (see ``manual_combine`` for details). (A string glob will
@@ -745,7 +745,7 @@ def open_mfdataset(
         In general, these should divide the dimensions of each dataset. If int, chunk
         each dimension by ``chunks``. By default, chunks will be chosen to load entire
         input files into memory at once. This has a major impact on performance: please
-        see the full documentation for more details [2].
+        see the full documentation for more details [2]_.
     concat_dim : str, or list of str, DataArray, Index or None, optional
         Dimensions to concatenate files along.  You only need to provide this argument
         if any of the dimensions along which you want to concatenate is not a dimension
@@ -761,6 +761,7 @@ def open_mfdataset(
               'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts when merging:
+
          * 'broadcast_equals': all values must be equal when variables are
            broadcast against each other to ensure common dimensions.
          * 'equals': all values and dimensions must be the same.
@@ -770,6 +771,7 @@ def open_mfdataset(
            must be equal. The returned dataset then contains the combination
            of all non-null values.
          * 'override': skip comparing and pick variable from first dataset
+
     preprocess : callable, optional
         If provided, call this function on each dataset prior to concatenation.
         You can find the file-name from which each dataset was loaded in
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 559c5e16287..4005d4fbf6d 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -506,7 +506,7 @@ def strftime(self, date_format):
 
         Returns
         -------
-        Index
+        pandas.Index
             Index of formatted strings
 
         Examples
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 41ff5a3b32d..b820d215d2f 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -108,7 +108,7 @@ def align(
 
     Returns
     -------
-    aligned : same as *objects
+    aligned : same as `*objects`
         Tuple of objects with aligned coordinates.
 
     Raises
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 3308dcef285..b9db30a9f92 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -531,6 +531,7 @@ def combine_by_coords(
         * 'all': All data variables will be concatenated.
         * list of str: The listed data variables will be concatenated, in
           addition to the 'minimal' data variables.
+
         If objects are DataArrays, `data_vars` must be 'all'.
     coords : {'minimal', 'different', 'all' or list of str}, optional
         As per the 'data_vars' kwarg, but for coordinate variables.
@@ -747,6 +748,7 @@ def auto_combine(
              'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts:
+
         - 'broadcast_equals': all values must be equal when variables are
           broadcast against each other to ensure common dimensions.
         - 'equals': all values and dimensions must be the same.
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 2afe4b4c3a7..a74318b2f90 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -91,15 +91,23 @@ def wrapped_func(self, dim=None, **kwargs):  # type: ignore
 
         return wrapped_func
 
-    _reduce_extra_args_docstring = """dim : str or sequence of str, optional
+    _reduce_extra_args_docstring = dedent(
+        """
+        dim : str or sequence of str, optional
             Dimension(s) over which to apply `{name}`.  By default `{name}` is
-            applied over all dimensions."""
+            applied over all dimensions.
+        """
+    ).strip()
 
-    _cum_extra_args_docstring = """dim : str or sequence of str, optional
+    _cum_extra_args_docstring = dedent(
+        """
+        dim : str or sequence of str, optional
             Dimension over which to apply `{name}`.
         axis : int or sequence of int, optional
             Axis over which to apply `{name}`. Only one of the 'dim'
-            and 'axis' arguments can be supplied."""
+            and 'axis' arguments can be supplied.
+        """
+    ).strip()
 
 
 class AbstractArray(ImplementsArrayReduce):
@@ -454,7 +462,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
     def assign_attrs(self, *args, **kwargs):
         """Assign new attrs to this object.
 
-        Returns a new object equivalent to self.attrs.update(*args, **kwargs).
+        Returns a new object equivalent to ``self.attrs.update(*args, **kwargs)``.
 
         Parameters
         ----------
@@ -481,7 +489,7 @@ def pipe(
         **kwargs,
     ) -> T:
         """
-        Apply func(self, *args, **kwargs)
+        Apply ``func(self, *args, **kwargs)``
 
         This method replicates the pandas method of the same name.
 
@@ -810,6 +818,7 @@ def rolling_exp(
         ----------
         window : A single mapping from a dimension name to window value,
                  optional
+
             dim : str
                 Name of the dimension to create the rolling exponential window
                 along (e.g., `time`).
@@ -848,6 +857,7 @@ def coarsen(
         ----------
         dim: dict, optional
             Mapping from the dimension name to the window size.
+
             dim : str
                 Name of the dimension to create the rolling iterator
                 along (e.g., `time`).
@@ -858,7 +868,7 @@ def coarsen(
             multiple of the window size. If 'trim', the excess entries are
             dropped. If 'pad', NA will be padded.
         side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
-        coord_func: function (name) that is applied to the coordintes,
+        coord_func : function (name) that is applied to the coordintes,
             or a mapping from coordinate name to function (name).
 
         Returns
@@ -921,7 +931,7 @@ def resample(
         Parameters
         ----------
         indexer : {dim: freq}, optional
-            Mapping from the dimension name to resample frequency. The
+            Mapping from the dimension name to resample frequency [1]_. The
             dimension must be datetime-like.
         skipna : bool, optional
             Whether to skip missing values when aggregating in downsampling.
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index bb5ab07d8dd..f8e4914e57b 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -947,7 +947,7 @@ def earth_mover_distance(first_samples,
     appropriately for use in `apply`. You may find helper functions such as
     numpy.broadcast_arrays helpful in writing your function. `apply_ufunc` also
     works well with numba's vectorize and guvectorize. Further explanation with
-    examples are provided in the xarray documentation [3].
+    examples are provided in the xarray documentation [3]_.
 
     See also
     --------
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 5b4fc078236..5ccbfa3f2b4 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -45,6 +45,7 @@ def concat(
           * 'all': All data variables will be concatenated.
           * list of str: The listed data variables will be concatenated, in
             addition to the 'minimal' data variables.
+
         If objects are DataArrays, data_vars must be 'all'.
     coords : {'minimal', 'different', 'all' or list of str}, optional
         These coordinate variables will be concatenated together:
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 23342fc5e0d..1205362ad91 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -239,7 +239,7 @@ class DataArray(AbstractArray, DataWithCoords):
     ----------
     dims : tuple
         Dimension names associated with this array.
-    values : np.ndarray
+    values : numpy.ndarray
         Access or modify DataArray values as a numpy array.
     coords : dict-like
         Dictionary of DataArray objects that label values along each dimension.
@@ -1315,7 +1315,7 @@ def interp(
             values.
         kwargs: dictionary
             Additional keyword passed to scipy's interpolator.
-        **coords_kwarg : {dim: coordinate, ...}, optional
+        ``**coords_kwarg`` : {dim: coordinate, ...}, optional
             The keyword arguments form of ``coords``.
             One of coords or coords_kwargs must be provided.
 
@@ -2044,6 +2044,7 @@ def interpolate_na(
               provided.
             - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
               respective :py:class:`scipy.interpolate` classes.
+
         use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
@@ -2063,6 +2064,7 @@ def interpolate_na(
             - a string that is valid input for pandas.to_timedelta
             - a :py:class:`numpy.timedelta64` object
             - a :py:class:`pandas.Timedelta` object
+
             Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
             dimensions has not been implemented yet. Gap length is defined as the difference
             between coordinate values at the first data point after a gap and the last value
@@ -2946,7 +2948,7 @@ def quantile(
             is a scalar. If multiple percentiles are given, first axis of
             the result corresponds to the quantile and a quantile dimension
             is added to the return array. The other dimensions are the
-             dimensions that remain after the reduction of the array.
+            dimensions that remain after the reduction of the array.
 
         See Also
         --------
@@ -3071,8 +3073,8 @@ def integrate(
             Coordinate(s) used for the integration.
         datetime_unit: str, optional
             Can be used to specify the unit if datetime coordinate is used.
-            One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns',
-                    'ps', 'fs', 'as'}
+            One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps',
+            'fs', 'as'}
 
         Returns
         -------
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index c631a4c11ea..5b9663c2453 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1509,7 +1509,7 @@ def to_netcdf(
             Nested dictionary with variable names as keys and dictionaries of
             variable specific encodings as values, e.g.,
             ``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1,
-                               'zlib': True}, ...}``
+            'zlib': True}, ...}``
 
             The `h5netcdf` engine supports both the NetCDF4-style compression
             encoding parameters ``{'zlib': True, 'complevel': 9}`` and the h5py
@@ -2118,7 +2118,7 @@ def thin(
         indexers: Union[Mapping[Hashable, int], int] = None,
         **indexers_kwargs: Any,
     ) -> "Dataset":
-        """Returns a new dataset with each array indexed along every `n`th
+        """Returns a new dataset with each array indexed along every `n`-th
         value for the specified dimension(s)
 
         Parameters
@@ -2127,7 +2127,7 @@ def thin(
             A dict with keys matching dimensions and integer values `n`
             or a single integer `n` applied over all dimensions.
             One of indexers or indexers_kwargs must be provided.
-        **indexers_kwargs : {dim: n, ...}, optional
+        ``**indexers_kwargs`` : {dim: n, ...}, optional
             The keyword arguments form of ``indexers``.
             One of indexers or indexers_kwargs must be provided.
 
@@ -3476,6 +3476,7 @@ def merge(
                   'no_conflicts'}, optional
             String indicating how to compare variables of the same name for
             potential conflicts:
+
             - 'broadcast_equals': all values must be equal when variables are
               broadcast against each other to ensure common dimensions.
             - 'equals': all values and dimensions must be the same.
@@ -3484,6 +3485,7 @@ def merge(
             - 'no_conflicts': only values which are not null in both datasets
               must be equal. The returned dataset then contains the combination
               of all non-null values.
+
         join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
             Method for joining ``self`` and ``other`` along shared dimensions:
 
@@ -3624,7 +3626,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs):
             in the dataset. If 'ignore', any given labels that are in the
             dataset are dropped and no error is raised.
         **labels_kwargs : {dim: label, ...}, optional
-            The keyword arguments form of ``dim`` and ``labels`
+            The keyword arguments form of ``dim`` and ``labels``
 
         Returns
         -------
@@ -3914,6 +3916,7 @@ def interpolate_na(
         ----------
         dim : str
             Specifies the dimension along which to interpolate.
+
         method : str, optional
             String indicating which method to use for interpolation:
 
@@ -3925,6 +3928,7 @@ def interpolate_na(
               provided.
             - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
               respective :py:class:`scipy.interpolate` classes.
+
         use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
@@ -3944,6 +3948,7 @@ def interpolate_na(
             - a string that is valid input for pandas.to_timedelta
             - a :py:class:`numpy.timedelta64` object
             - a :py:class:`pandas.Timedelta` object
+
             Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
             dimensions has not been implemented yet. Gap length is defined as the difference
             between coordinate values at the first data point after a gap and the last value
@@ -5251,7 +5256,7 @@ def integrate(self, coord, datetime_unit=None):
         datetime_unit
             Can be specify the unit if datetime coordinate is used. One of
             {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs',
-             'as'}
+            'as'}
 
         Returns
         -------
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 38ecc04534a..ec752721781 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -573,6 +573,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
             This optional parameter specifies the interpolation method to
             use when the desired quantile lies between two data points
             ``i < j``:
+
                 * linear: ``i + (j - i) * fraction``, where ``fraction`` is
                   the fractional part of the index surrounded by ``i`` and
                   ``j``.
@@ -728,17 +729,19 @@ def map(self, func, shortcut=False, args=(), **kwargs):
             Callable to apply to each array.
         shortcut : bool, optional
             Whether or not to shortcut evaluation under the assumptions that:
+
             (1) The action of `func` does not depend on any of the array
                 metadata (attributes or coordinates) but only on the data and
                 dimensions.
             (2) The action of `func` creates arrays with homogeneous metadata,
                 that is, with the same dimensions and attributes.
+
             If these conditions are satisfied `shortcut` provides significant
             speedup. This should be the case for many common groupby operations
             (e.g., applying numpy ufuncs).
-        args : tuple, optional
+        ``*args`` : tuple, optional
             Positional arguments passed to `func`.
-        **kwargs
+        ``**kwargs``
             Used to call `func(ar, **kwargs)` for each array `ar`.
 
         Returns
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 5c754c3f49b..16a4943627e 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -269,7 +269,7 @@ def line(
         if None, use the default for the matplotlib function.
     add_legend : boolean, optional
         Add legend with y axis coordinates (2D inputs only).
-    *args, **kwargs : optional
+    ``*args``, ``**kwargs`` : optional
         Additional arguments to matplotlib.pyplot.plot
     """
     # Handle facetgrids first
diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py
index 0f6fc3b1334..ae2c5c574b6 100644
--- a/xarray/ufuncs.py
+++ b/xarray/ufuncs.py
@@ -13,6 +13,7 @@
 Once NumPy 1.10 comes out with support for overriding ufuncs, this module will
 hopefully no longer be necessary.
 """
+import textwrap
 import warnings as _warnings
 
 import numpy as _np
@@ -78,10 +79,49 @@ def __call__(self, *args, **kwargs):
         return res
 
 
+def _skip_signature(doc, name):
+    if not isinstance(doc, str):
+        return doc
+
+    if doc.startswith(name):
+        signature_end = doc.find("\n\n")
+        doc = doc[signature_end + 2 :]
+
+    return doc
+
+
+def _remove_unused_reference_labels(doc):
+    if not isinstance(doc, str):
+        return doc
+
+    max_references = 5
+    for num in range(max_references):
+        label = f".. [{num}]"
+        reference = f"[{num}]_"
+        index = f"{num}.    "
+
+        if label not in doc or reference in doc:
+            continue
+
+        doc = doc.replace(label, index)
+
+    return doc
+
+
+def _dedent(doc):
+    if not isinstance(doc, str):
+        return doc
+
+    return textwrap.dedent(doc)
+
+
 def _create_op(name):
     func = _UFuncDispatcher(name)
     func.__name__ = name
     doc = getattr(_np, name).__doc__
+
+    doc = _remove_unused_reference_labels(_skip_signature(_dedent(doc), name))
+
     func.__doc__ = (
         "xarray specific variant of numpy.%s. Handles "
         "xarray.Dataset, xarray.DataArray, xarray.Variable, "

From 220adbc65e0b8c46feddaa6984df4a3a1ce0af6b Mon Sep 17 00:00:00 2001
From: Keisuke Fujii <fUjiisoup@gmail.com>
Date: Wed, 20 Nov 2019 01:23:33 +0900
Subject: [PATCH 20/24] sparse option to reindex and unstack (#3542)

* Added fill_value for unstack

* remove sparse option and fix unintended changes

* a bug fix

* Added sparse option to unstack and reindex

* black

* More tests

* black

* Remove sparse option from reindex

* try __array_function__ where

* flake8
---
 doc/whats-new.rst             |  4 ++++
 xarray/core/alignment.py      |  5 +++++
 xarray/core/dataarray.py      |  4 +++-
 xarray/core/dataset.py        | 35 +++++++++++++++++++++++++++++---
 xarray/core/variable.py       | 38 +++++++++++++++++++++++++++++++++++
 xarray/tests/test_dataset.py  | 19 ++++++++++++++++++
 xarray/tests/test_variable.py | 12 +++++++++++
 7 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 105d661b5f7..9f5d57d4a72 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -37,6 +37,10 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, 
+  :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, 
+  :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
 - Added the ``max_gap`` kwarg to :py:meth:`DataArray.interpolate_na` and
   :py:meth:`Dataset.interpolate_na`. This controls the maximum size of the data
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index b820d215d2f..908119f7995 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -466,6 +466,7 @@ def reindex_variables(
     tolerance: Any = None,
     copy: bool = True,
     fill_value: Optional[Any] = dtypes.NA,
+    sparse: bool = False,
 ) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]:
     """Conform a dictionary of aligned variables onto a new set of variables,
     filling in missing values with NaN.
@@ -503,6 +504,8 @@ def reindex_variables(
         the input. In either case, new xarray objects are always returned.
     fill_value : scalar, optional
         Value to use for newly missing values
+    sparse: bool, optional
+        Use an sparse-array
 
     Returns
     -------
@@ -571,6 +574,8 @@ def reindex_variables(
 
     for name, var in variables.items():
         if name not in indexers:
+            if sparse:
+                var = var._as_sparse(fill_value=fill_value)
             key = tuple(
                 slice(None) if d in unchanged_dims else int_indexers.get(d, slice(None))
                 for d in var.dims
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 1205362ad91..c92fcb956b1 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1729,6 +1729,7 @@ def unstack(
         self,
         dim: Union[Hashable, Sequence[Hashable], None] = None,
         fill_value: Any = dtypes.NA,
+        sparse: bool = False,
     ) -> "DataArray":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
@@ -1742,6 +1743,7 @@ def unstack(
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
         fill_value: value to be filled. By default, np.nan
+        sparse: use sparse-array if True
 
         Returns
         -------
@@ -1773,7 +1775,7 @@ def unstack(
         --------
         DataArray.stack
         """
-        ds = self._to_temp_dataset().unstack(dim, fill_value)
+        ds = self._to_temp_dataset().unstack(dim, fill_value, sparse)
         return self._from_temp_dataset(ds)
 
     def to_unstacked_dataset(self, dim, level=0):
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5b9663c2453..206f2f55b3c 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2286,6 +2286,7 @@ def reindex(
             the input. In either case, a new xarray object is always returned.
         fill_value : scalar, optional
             Value to use for newly missing values
+        sparse: use sparse-array. By default, False
         **indexers_kwarg : {dim: indexer, ...}, optional
             Keyword arguments in the same form as ``indexers``.
             One of indexers or indexers_kwargs must be provided.
@@ -2428,6 +2429,29 @@ def reindex(
         the original and desired indexes. If you do want to fill in the `NaN` values present in the
         original dataset, use the :py:meth:`~Dataset.fillna()` method.
 
+        """
+        return self._reindex(
+            indexers,
+            method,
+            tolerance,
+            copy,
+            fill_value,
+            sparse=False,
+            **indexers_kwargs,
+        )
+
+    def _reindex(
+        self,
+        indexers: Mapping[Hashable, Any] = None,
+        method: str = None,
+        tolerance: Number = None,
+        copy: bool = True,
+        fill_value: Any = dtypes.NA,
+        sparse: bool = False,
+        **indexers_kwargs: Any,
+    ) -> "Dataset":
+        """
+        same to _reindex but support sparse option
         """
         indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex")
 
@@ -2444,6 +2468,7 @@ def reindex(
             tolerance,
             copy=copy,
             fill_value=fill_value,
+            sparse=sparse,
         )
         coord_names = set(self._coord_names)
         coord_names.update(indexers)
@@ -3327,7 +3352,7 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value, sparse) -> "Dataset":
         index = self.get_index(dim)
         index = index.remove_unused_levels()
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -3336,7 +3361,9 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset":
         if index.equals(full_idx):
             obj = self
         else:
-            obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value)
+            obj = self._reindex(
+                {dim: full_idx}, copy=False, fill_value=fill_value, sparse=sparse
+            )
 
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
@@ -3366,6 +3393,7 @@ def unstack(
         self,
         dim: Union[Hashable, Iterable[Hashable]] = None,
         fill_value: Any = dtypes.NA,
+        sparse: bool = False,
     ) -> "Dataset":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
@@ -3379,6 +3407,7 @@ def unstack(
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
         fill_value: value to be filled. By default, np.nan
+        sparse: use sparse-array if True
 
         Returns
         -------
@@ -3416,7 +3445,7 @@ def unstack(
 
         result = self.copy(deep=False)
         for dim in dims:
-            result = result._unstack_once(dim, fill_value)
+            result = result._unstack_once(dim, fill_value, sparse)
         return result
 
     def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset":
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index e630dc4b457..55e8f64d56c 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -993,6 +993,36 @@ def chunk(self, chunks=None, name=None, lock=False):
 
         return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)
 
+    def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA):
+        """
+        use sparse-array as backend.
+        """
+        import sparse
+
+        # TODO  what to do if dask-backended?
+        if fill_value is dtypes.NA:
+            dtype, fill_value = dtypes.maybe_promote(self.dtype)
+        else:
+            dtype = dtypes.result_type(self.dtype, fill_value)
+
+        if sparse_format is _default:
+            sparse_format = "coo"
+        try:
+            as_sparse = getattr(sparse, "as_{}".format(sparse_format.lower()))
+        except AttributeError:
+            raise ValueError("{} is not a valid sparse format".format(sparse_format))
+
+        data = as_sparse(self.data.astype(dtype), fill_value=fill_value)
+        return self._replace(data=data)
+
+    def _to_dense(self):
+        """
+        Change backend from sparse to np.array
+        """
+        if hasattr(self._data, "todense"):
+            return self._replace(data=self._data.todense())
+        return self.copy(deep=False)
+
     def isel(
         self: VariableType,
         indexers: Mapping[Hashable, Any] = None,
@@ -2021,6 +2051,14 @@ def chunk(self, chunks=None, name=None, lock=False):
         # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk()
         return self.copy(deep=False)
 
+    def _as_sparse(self, sparse_format=_default, fill_value=_default):
+        # Dummy
+        return self.copy(deep=False)
+
+    def _to_dense(self):
+        # Dummy
+        return self.copy(deep=False)
+
     def _finalize_indexing_result(self, dims, data):
         if getattr(data, "ndim", 0) != 1:
             # returns Variable rather than IndexVariable if multi-dimensional
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index de074da541f..e8fe768b783 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2811,6 +2811,25 @@ def test_unstack_fill_value(self):
         expected = ds["var"].unstack("index").fillna(-1).astype(np.int)
         assert actual.equals(expected)
 
+    @requires_sparse
+    def test_unstack_sparse(self):
+        ds = xr.Dataset(
+            {"var": (("x",), np.arange(6))},
+            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
+        )
+        # make ds incomplete
+        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
+        # test fill_value
+        actual = ds.unstack("index", sparse=True)
+        expected = ds.unstack("index")
+        assert actual["var"].variable._to_dense().equals(expected["var"].variable)
+        assert actual["var"].data.density < 1.0
+
+        actual = ds["var"].unstack("index", sparse=True)
+        expected = ds["var"].unstack("index")
+        assert actual.variable._to_dense().equals(expected.variable)
+        assert actual.data.density < 1.0
+
     def test_stack_unstack_fast(self):
         ds = Dataset(
             {
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index d92a68729b5..ee8d54e567e 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -33,6 +33,7 @@
     assert_identical,
     raises_regex,
     requires_dask,
+    requires_sparse,
     source_ndarray,
 )
 
@@ -1862,6 +1863,17 @@ def test_getitem_with_mask_nd_indexer(self):
         )
 
 
+@requires_sparse
+class TestVariableWithSparse:
+    # TODO inherit VariableSubclassobjects to cover more tests
+
+    def test_as_sparse(self):
+        data = np.arange(12).reshape(3, 4)
+        var = Variable(("x", "y"), data)._as_sparse(fill_value=-1)
+        actual = var._to_dense()
+        assert_identical(var, actual)
+
+
 class TestIndexVariable(VariableSubclassobjects):
     cls = staticmethod(IndexVariable)
 

From 0ef9aa3abae55833e4431d690bc55c5b5a44911b Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Tue, 19 Nov 2019 17:21:48 -0500
Subject: [PATCH 21/24] 0.14.1 whatsnew (#3547)

---
 doc/whats-new.rst | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 9f5d57d4a72..f47aad9b5a8 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -15,7 +15,7 @@ What's New
 
 .. _whats-new.0.14.1:
 
-v0.14.1 (unreleased)
+v0.14.1 (19 Nov 2019)
 --------------------
 
 Breaking changes
@@ -41,23 +41,20 @@ New Features
   :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, 
   :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`).
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
-
-- Added the ``max_gap`` kwarg to :py:meth:`DataArray.interpolate_na` and
-  :py:meth:`Dataset.interpolate_na`. This controls the maximum size of the data
 - Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and
   :py:meth:`Dataset.unstack` (:issue:`3518`, :pull:`3541`).
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
   :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
   gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
-- :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels.
+- Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels.
   :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
   dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` &
   :py:meth:`DataArray.drop` methods remain as a backward compatible
   option for dropping either labels or variables, but using the more specific methods is encouraged.
   (:pull:`3475`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
-- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for 
+- Added :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` for 
   mapping / applying a function over each item in the collection, reflecting the widely used
   and least surprising name for this operation.
   The existing ``apply`` methods remain for backward compatibility, though using the ``map``
@@ -131,7 +128,7 @@ Documentation
 - Fix leap year condition in `monthly means example <http://xarray.pydata.org/en/stable/examples/monthly-means.html>`_.
   By `Mickaël Lalande <https://github.com/mickaellalande>`_.
 - Fix the documentation of :py:meth:`DataArray.resample` and
-  :py:meth:`Dataset.resample` — explicitly state that a
+  :py:meth:`Dataset.resample`,  explicitly stating that a
   datetime-like dimension is required. (:pull:`3400`)
   By `Justus Magin <https://github.com/keewis>`_.
 - Update the :ref:`terminology` page to address multidimensional coordinates. (:pull:`3410`)

From 7466be623fbb4bbb5efc389d31436bd38e53d198 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Tue, 19 Nov 2019 17:49:32 -0500
Subject: [PATCH 22/24] Revert to dev version

---
 doc/whats-new.rst | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index f47aad9b5a8..de834512e36 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -15,6 +15,31 @@ What's New
 
 .. _whats-new.0.14.1:
 
+
+v0.15.0 (unreleased)
+--------------------
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+
+New Features
+~~~~~~~~~~~~
+
+
+Bug fixes
+~~~~~~~~~
+
+
+Documentation
+~~~~~~~~~~~~~
+
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
+
+
 v0.14.1 (19 Nov 2019)
 --------------------
 

From 6b70107ab3063187b663290538c0d5a4107dab6e Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Wed, 20 Nov 2019 09:47:56 +0000
Subject: [PATCH 23/24] Clarify conda environments for new contributors (#3551)

---
 doc/contributing.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/contributing.rst b/doc/contributing.rst
index 028ec47e014..3cd0b3e8868 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -151,7 +151,9 @@ We'll now kick off a two-step process:
 .. code-block:: none
 
    # Create and activate the build environment
-   conda env create -f ci/requirements/py36.yml
+   # This is for Linux and MacOS. On Windows, use py37-windows.yml instead.
+   conda env create -f ci/requirements/py37.yml
+
    conda activate xarray-tests
 
    # or with older versions of Anaconda:

From 8d09879748d2e201ac6de7345e71fa7320801131 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Thu, 21 Nov 2019 09:45:20 -0500
Subject: [PATCH 24/24] Tweaks to release instructions (#3555)

* tweaks to release instructions

* Update HOW_TO_RELEASE.md

Co-Authored-By: keewis <keewis@users.noreply.github.com>

* no need for --reverse either

* add cool script as option from @keewis

* whatsnew reference

* tweak
---
 HOW_TO_RELEASE => HOW_TO_RELEASE.md | 51 ++++++++++++++++++++++-------
 doc/whats-new.rst                   |  2 +-
 2 files changed, 41 insertions(+), 12 deletions(-)
 rename HOW_TO_RELEASE => HOW_TO_RELEASE.md (74%)

diff --git a/HOW_TO_RELEASE b/HOW_TO_RELEASE.md
similarity index 74%
rename from HOW_TO_RELEASE
rename to HOW_TO_RELEASE.md
index 5bf9bf38ded..cdeb0e19a3e 100644
--- a/HOW_TO_RELEASE
+++ b/HOW_TO_RELEASE.md
@@ -1,9 +1,11 @@
-How to issue an xarray release in 15 easy steps
+How to issue an xarray release in 14 easy steps
 
 Time required: about an hour.
 
  1. Ensure your master branch is synced to upstream:
-       git pull upstream master
+      ```
+      git pull upstream master
+      ```
  2. Look over whats-new.rst and the docs. Make sure "What's New" is complete
     (check the date!) and consider adding a brief summary note describing the
     release at the top.
@@ -12,37 +14,53 @@ Time required: about an hour.
     - Function/method references should include links to the API docs.
     - Sometimes notes get added in the wrong section of whats-new, typically
       due to a bad merge. Check for these before a release by using git diff,
-      e.g., ``git diff v0.X.Y whats-new.rst`` where 0.X.Y is the previous
+      e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous
       release.
  3. If you have any doubts, run the full test suite one final time!
-      py.test
+      ```
+      pytest
+      ```
  4. On the master branch, commit the release in git:
+      ```
       git commit -a -m 'Release v0.X.Y'
+      ```
  5. Tag the release:
+      ```
       git tag -a v0.X.Y -m 'v0.X.Y'
+      ```
  6. Build source and binary wheels for pypi:
+      ```
       git clean -xdf  # this deletes all uncommited changes!
       python setup.py bdist_wheel sdist
+      ```
  7. Use twine to register and upload the release on pypi. Be careful, you can't
     take this back!
+      ```
       twine upload dist/xarray-0.X.Y*
+      ```
     You will need to be listed as a package owner at
     https://pypi.python.org/pypi/xarray for this to work.
  8. Push your changes to master:
+      ```
       git push upstream master
       git push upstream --tags
+      ```
  9. Update the stable branch (used by ReadTheDocs) and switch back to master:
+     ```
       git checkout stable
       git rebase master
       git push upstream stable
       git checkout master
-    It's OK to force push to 'stable' if necessary.
-    We also update the stable branch with `git cherrypick` for documentation
-    only fixes that apply the current released version.
+     ```
+    It's OK to force push to 'stable' if necessary. (We also update the stable 
+    branch with `git cherrypick` for documentation only fixes that apply the 
+    current released version.)
 10. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst.
 11. Commit your changes and push to master again:
-      git commit -a -m 'Revert to dev version'
+      ```
+      git commit -a -m 'New whatsnew section'
       git push upstream master
+      ```
     You're done pushing to master!
 12. Issue the release on GitHub. Click on "Draft a new release" at
     https://github.com/pydata/xarray/releases. Type in the version number, but
@@ -53,11 +71,22 @@ Time required: about an hour.
 14. Issue the release announcement! For bug fix releases, I usually only email
     xarray@googlegroups.com. For major/feature releases, I will email a broader
     list (no more than once every 3-6 months):
-      pydata@googlegroups.com, xarray@googlegroups.com,
-      numpy-discussion@scipy.org, scipy-user@scipy.org,
-      pyaos@lists.johnny-lin.com
+      - pydata@googlegroups.com
+      - xarray@googlegroups.com
+      - numpy-discussion@scipy.org
+      - scipy-user@scipy.org
+      - pyaos@lists.johnny-lin.com
+
     Google search will turn up examples of prior release announcements (look for
     "ANN xarray").
+    You can get a list of contributors with:
+    ```
+    git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u
+    ```
+    or by replacing `v0.X.Y` with the _previous_ release in:
+    ```
+    git log v0.X.Y.. --format="%aN" | sort -u
+    ```
 
 Note on version numbering:
 
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index de834512e36..91eed098522 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -13,7 +13,7 @@ What's New
     import xarray as xr
     np.random.seed(123456)
 
-.. _whats-new.0.14.1:
+.. _whats-new.0.15.0:
 
 
 v0.15.0 (unreleased)