From 637fb879ac1b24d0e9676a4d49a93d894c65cad0 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 10:50:50 -0500
Subject: [PATCH 01/18] Use integers when possible to encode/decode times

---
 xarray/coding/times.py            | 29 ++++++++++++----
 xarray/tests/test_coding_times.py | 55 ++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 59f8b89743a..99a5912727a 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -26,6 +26,7 @@
 _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"}
 
 _NS_PER_TIME_DELTA = {
+    "ns": 1,
     "us": int(1e3),
     "ms": int(1e6),
     "s": int(1e9),
@@ -44,6 +45,7 @@ def _netcdf_to_numpy_timeunit(units):
     if not units.endswith("s"):
         units = "%ss" % units
     return {
+        "nanoseconds": "ns",
         "microseconds": "us",
         "milliseconds": "ms",
         "seconds": "s",
@@ -162,10 +164,9 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
     # Cast input dates to integers of nanoseconds because `pd.to_datetime`
     # works much faster when dealing with integers
     # make _NS_PER_TIME_DELTA an array to ensure type upcasting
-    flat_num_dates_ns_int = (
-        flat_num_dates.astype(np.float64) * _NS_PER_TIME_DELTA[delta]
-    ).astype(np.int64)
-
+    flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
+        np.int64
+    )
     return (pd.to_timedelta(flat_num_dates_ns_int, "ns") + ref_date).values
 
 
@@ -252,7 +253,15 @@ def decode_cf_timedelta(num_timedeltas, units):
 
 
 def _infer_time_units_from_diff(unique_timedeltas):
-    for time_unit in ["days", "hours", "minutes", "seconds"]:
+    for time_unit in [
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "nanoseconds",
+    ]:
         delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)]
         unit_delta = np.timedelta64(delta_ns, "ns")
         diffs = unique_timedeltas / unit_delta
@@ -416,7 +425,15 @@ def encode_cf_datetime(dates, units=None, calendar=None):
         # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
         # an OverflowError is raised if the ref_date is too far away from
         # dates to be encoded (GH 2272).
-        num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta
+        dates_as_index = pd.DatetimeIndex(dates.ravel())
+        time_deltas = dates_as_index - ref_date
+
+        # Use floor division if the time_delta evenly divides all differences
+        # to preserve integer dtype if possible.
+        if np.all(time_deltas % time_delta == np.timedelta64(0, delta_units)):
+            num = time_deltas // time_delta
+        else:
+            num = time_deltas / time_delta
         num = num.values.reshape(dates.shape)
 
     except (OutOfBoundsDatetime, OverflowError):
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index d35cad019b7..56478498c37 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -6,7 +6,7 @@
 import pytest
 from pandas.errors import OutOfBoundsDatetime
 
-from xarray import DataArray, Dataset, Variable, coding, decode_cf
+from xarray import DataArray, Dataset, Variable, coding, conventions, decode_cf
 from xarray.coding.times import (
     cftime_to_nptime,
     decode_cf_datetime,
@@ -958,3 +958,56 @@ def test_decode_ambiguous_time_warns(calendar):
         assert not record
 
     np.testing.assert_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    ("freq", "units"),
+    [
+        ("D", "days"),
+        ("D", "hours"),
+        ("D", "minutes"),
+        ("D", "seconds"),
+        ("D", "milliseconds"),
+        ("D", "microseconds"),
+        ("D", "nanoseconds"),
+        ("H", "hours"),
+        ("H", "minutes"),
+        ("H", "seconds"),
+        ("H", "milliseconds"),
+        ("H", "microseconds"),
+        ("H", "nanoseconds"),
+        ("T", "minutes"),
+        ("T", "seconds"),
+        ("T", "milliseconds"),
+        ("T", "microseconds"),
+        ("T", "nanoseconds"),
+        ("S", "seconds"),
+        ("S", "milliseconds"),
+        ("S", "microseconds"),
+        ("S", "nanoseconds"),
+        ("L", "milliseconds"),
+        ("L", "microseconds"),
+        ("L", "nanoseconds"),
+        ("U", "microseconds"),
+        ("U", "nanoseconds"),
+        ("N", "nanoseconds"),
+    ],
+)
+def test_encode_cf_datetime_preserves_integer_units(freq, units):
+    times = pd.date_range("2000", periods=3, freq=freq)
+    cf_units = f"{units} since 2000-01-01"
+    encoded, _, _ = coding.times.encode_cf_datetime(times, cf_units)
+    assert encoded.dtype == np.int64
+
+
+@pytest.mark.parametrize(
+    "encoding",
+    [{"units": "nanoseconds since 1900-01-01"}, {}],
+    ids=["explicit-units", "no-units"],
+)
+def test_nanosecond_resolution_roundtrip(encoding):
+    times = pd.date_range("2000", periods=12, freq="337N")
+    variable = Variable(["time"], times, encoding=encoding)
+    encoded = conventions.encode_cf_variable(variable)
+    decoded = conventions.decode_cf_variable("time", encoded)
+    assert_equal(variable, decoded)

From e2efadbe3b5e2f130123d0b12109abd71aa461cc Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 14:39:06 -0500
Subject: [PATCH 02/18] Improve tests

---
 xarray/tests/test_coding_times.py | 73 ++++++++++++++-----------------
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 56478498c37..b31f4f1b27b 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -960,44 +960,39 @@ def test_decode_ambiguous_time_warns(calendar):
     np.testing.assert_array_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    ("freq", "units"),
-    [
-        ("D", "days"),
-        ("D", "hours"),
-        ("D", "minutes"),
-        ("D", "seconds"),
-        ("D", "milliseconds"),
-        ("D", "microseconds"),
-        ("D", "nanoseconds"),
-        ("H", "hours"),
-        ("H", "minutes"),
-        ("H", "seconds"),
-        ("H", "milliseconds"),
-        ("H", "microseconds"),
-        ("H", "nanoseconds"),
-        ("T", "minutes"),
-        ("T", "seconds"),
-        ("T", "milliseconds"),
-        ("T", "microseconds"),
-        ("T", "nanoseconds"),
-        ("S", "seconds"),
-        ("S", "milliseconds"),
-        ("S", "microseconds"),
-        ("S", "nanoseconds"),
-        ("L", "milliseconds"),
-        ("L", "microseconds"),
-        ("L", "nanoseconds"),
-        ("U", "microseconds"),
-        ("U", "nanoseconds"),
-        ("N", "nanoseconds"),
-    ],
-)
-def test_encode_cf_datetime_preserves_integer_units(freq, units):
-    times = pd.date_range("2000", periods=3, freq=freq)
-    cf_units = f"{units} since 2000-01-01"
-    encoded, _, _ = coding.times.encode_cf_datetime(times, cf_units)
-    assert encoded.dtype == np.int64
+ENCODING_UNITS = [
+    "days",
+    "hours",
+    "minutes",
+    "seconds",
+    "milliseconds",
+    "microseconds",
+    "nanoseconds",
+]
+NUMPY_FREQUENCIES = [
+    np.timedelta64(1, "D"),
+    np.timedelta64(1, "h"),
+    np.timedelta64(1, "m"),
+    np.timedelta64(1, "s"),
+    np.timedelta64(1, "ms"),
+    np.timedelta64(1, "us"),
+    np.timedelta64(1, "ns"),
+]
+
+
+@pytest.mark.parametrize("encoding_units", ENCODING_UNITS)
+@pytest.mark.parametrize("data_freq", NUMPY_FREQUENCIES, ids=lambda x: f"{x!r}")
+def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq):
+    times = pd.date_range("2000", periods=3, freq=pd.to_timedelta(data_freq))
+    units = f"{encoding_units} since 2000-01-01"
+    encoded, _, _ = coding.times.encode_cf_datetime(times, units)
+
+    numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units)
+    cf_units_as_timedelta = np.timedelta64(1, numpy_timeunit)
+    if data_freq >= cf_units_as_timedelta:
+        assert encoded.dtype == np.int64
+    else:
+        assert encoded.dtype == np.float64
 
 
 @pytest.mark.parametrize(
@@ -1005,7 +1000,7 @@ def test_encode_cf_datetime_preserves_integer_units(freq, units):
     [{"units": "nanoseconds since 1900-01-01"}, {}],
     ids=["explicit-units", "no-units"],
 )
-def test_nanosecond_resolution_roundtrip(encoding):
+def test_encode_decode_roundtrip(encoding):
     times = pd.date_range("2000", periods=12, freq="337N")
     variable = Variable(["time"], times, encoding=encoding)
     encoded = conventions.encode_cf_variable(variable)

From 6a14aa09b146007506509ef3ef08ca7a15afe1c4 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 14:51:02 -0500
Subject: [PATCH 03/18] Further improvements to tests

---
 xarray/tests/test_coding_times.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index b31f4f1b27b..548caee3aed 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -995,14 +995,11 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq)
         assert encoded.dtype == np.float64
 
 
-@pytest.mark.parametrize(
-    "encoding",
-    [{"units": "nanoseconds since 1900-01-01"}, {}],
-    ids=["explicit-units", "no-units"],
-)
-def test_encode_decode_roundtrip(encoding):
-    times = pd.date_range("2000", periods=12, freq="337N")
-    variable = Variable(["time"], times, encoding=encoding)
+@pytest.mark.parametrize("freq", ["N", "U", "L", "S", "T", "H", "D"])
+def test_encode_decode_roundtrip(freq):
+    initial_time = pd.date_range("1678-01-01", periods=1)
+    times = initial_time.append(pd.date_range("1968", periods=12, freq=freq))
+    variable = Variable(["time"], times)
     encoded = conventions.encode_cf_variable(variable)
     decoded = conventions.decode_cf_variable("time", encoded)
     assert_equal(variable, decoded)

From 9b8cfdaf129d000611a91cfc29a28dc3f53c8a8e Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 15:31:25 -0500
Subject: [PATCH 04/18] Remove optimization in favor of maximum correctness

---
 xarray/coding/times.py            | 20 +++++---------------
 xarray/tests/test_coding_times.py | 31 +++++++++++++++++++++----------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 99a5912727a..380f286c261 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -153,21 +153,11 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
         # strings, in which case we fall back to using cftime
         raise OutOfBoundsDatetime
 
-    # fixes: https://github.com/pydata/pandas/issues/14068
-    # these lines check if the the lowest or the highest value in dates
-    # cause an OutOfBoundsDatetime (Overflow) error
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning)
-        pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
-        pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
-
-    # Cast input dates to integers of nanoseconds because `pd.to_datetime`
-    # works much faster when dealing with integers
-    # make _NS_PER_TIME_DELTA an array to ensure type upcasting
-    flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
-        np.int64
-    )
-    return (pd.to_timedelta(flat_num_dates_ns_int, "ns") + ref_date).values
+    # Use pd.to_timedelta to safely cast integer or float values to timedeltas,
+    # and add those to a Timestamp to safely produce a DatetimeIndex.  This
+    # ensures that we do not encounter integer overflow at any point in the
+    # process without raising an OutOfBoundsDatetime or OverflowError.
+    return (pd.to_timedelta(flat_num_dates, delta) + ref_date).values
 
 
 def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 548caee3aed..d1967c72ea0 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -99,10 +99,12 @@ def test_cf_datetime(num_dates, units, calendar):
     if min_y >= 1678 and max_y < 2262:
         expected = cftime_to_nptime(expected)
 
+    print(expected)
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", "Unable to decode time axis")
         actual = coding.times.decode_cf_datetime(num_dates, units, calendar)
 
+    print(actual)
     abs_diff = np.asarray(abs(actual - expected)).ravel()
     abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy()
 
@@ -479,27 +481,36 @@ def test_decoded_cf_datetime_array_2d():
     assert_array_equal(np.asarray(result), expected)
 
 
+PANDAS_FREQUENCIES_TO_ENCODING_UNITS = {
+    "N": "nanoseconds",
+    "U": "microseconds",
+    "L": "milliseconds",
+    "S": "seconds",
+    "T": "minutes",
+    "H": "hours",
+    "D": "days"
+}
+
+
+@pytest.mark.parametrize(("freq", "units"), PANDAS_FREQUENCIES_TO_ENCODING_UNITS.items())
+def test_infer_datetime_units(freq, units):
+    dates = pd.date_range("2000", periods=2, freq=freq)
+    expected = f"{units} since 2000-01-01 00:00:00"
+    assert expected == coding.times.infer_datetime_units(dates)
+
+
 @pytest.mark.parametrize(
     ["dates", "expected"],
     [
-        (pd.date_range("1900-01-01", periods=5), "days since 1900-01-01 00:00:00"),
-        (
-            pd.date_range("1900-01-01 12:00:00", freq="H", periods=2),
-            "hours since 1900-01-01 12:00:00",
-        ),
         (
             pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]),
             "days since 1900-01-01 00:00:00",
         ),
-        (
-            pd.to_datetime(["1900-01-01", "1900-01-02T00:00:00.005"]),
-            "seconds since 1900-01-01 00:00:00",
-        ),
         (pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"),
         (pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"),
     ],
 )
-def test_infer_datetime_units(dates, expected):
+def test_infer_datetime_units_with_NaT(dates, expected):
     assert expected == coding.times.infer_datetime_units(dates)
 
 

From ce4bfb587aaef3e096b94b6dc3062fdc283aa24a Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 15:39:01 -0500
Subject: [PATCH 05/18] Remove print statements

---
 xarray/tests/test_coding_times.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index d1967c72ea0..a74d3f57dbd 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -99,12 +99,10 @@ def test_cf_datetime(num_dates, units, calendar):
     if min_y >= 1678 and max_y < 2262:
         expected = cftime_to_nptime(expected)
 
-    print(expected)
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", "Unable to decode time axis")
         actual = coding.times.decode_cf_datetime(num_dates, units, calendar)
 
-    print(actual)
     abs_diff = np.asarray(abs(actual - expected)).ravel()
     abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy()
 

From 032b577f033b017a3cfd8ea5b1cd3a4799cdaf6d Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 16:00:41 -0500
Subject: [PATCH 06/18] Restore optimization

---
 xarray/coding/times.py            | 18 +++++++++++++++---
 xarray/tests/test_coding_times.py |  6 ++++--
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 380f286c261..052ebed69a6 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -153,11 +153,23 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
         # strings, in which case we fall back to using cftime
         raise OutOfBoundsDatetime
 
-    # Use pd.to_timedelta to safely cast integer or float values to timedeltas,
+    # To avoid integer overflow when converting to nanosecond units for integer
+    # dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
+    # (GH #2002).
+    if flat_num_dates.dtype.kind == "i":
+        flat_num_dates = flat_num_dates.astype(np.int64)
+
+    # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
+    # works much faster when dealing with integers (GH #1399).
+    flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
+        np.int64
+    )
+
+    # Use pd.to_timedelta to safely cast integer values to timedeltas,
     # and add those to a Timestamp to safely produce a DatetimeIndex.  This
     # ensures that we do not encounter integer overflow at any point in the
-    # process without raising an OutOfBoundsDatetime or OverflowError.
-    return (pd.to_timedelta(flat_num_dates, delta) + ref_date).values
+    # process without raising OutOfBoundsDatetime.
+    return (pd.to_timedelta(flat_num_dates_ns_int, "ns") + ref_date).values
 
 
 def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index a74d3f57dbd..7110bb48bb2 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -486,11 +486,13 @@ def test_decoded_cf_datetime_array_2d():
     "S": "seconds",
     "T": "minutes",
     "H": "hours",
-    "D": "days"
+    "D": "days",
 }
 
 
-@pytest.mark.parametrize(("freq", "units"), PANDAS_FREQUENCIES_TO_ENCODING_UNITS.items())
+@pytest.mark.parametrize(
+    ("freq", "units"), PANDAS_FREQUENCIES_TO_ENCODING_UNITS.items()
+)
 def test_infer_datetime_units(freq, units):
     dates = pd.date_range("2000", periods=2, freq=freq)
     expected = f"{units} since 2000-01-01 00:00:00"

From f3a870f11189f91a5387e4ecaba3058226685c91 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 16:29:16 -0500
Subject: [PATCH 07/18] Add a what's new entry

---
 doc/whats-new.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 433e0185a91..c7b6c70b8cd 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -32,7 +32,14 @@ Bug fixes
 ~~~~~~~~~
 
 - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`).
-
+- By default, when possible, xarray will now always use values of type ``int64`` when encoding
+  and decoding ``numpy.datetime64[ns]`` datetimes.  This ensures that maximum
+  precision and accuracy is maintained in the round-tripping process
+  (:issue:`4045`). It also enables encoding and decoding standard calendar
+  dates with time units of nanoseconds (:pull:`4400`). By `Spencer Clark
+  <https://github.com/spencerkclark>`_ and `Mark Harfouche <http://github.com/hmaarrfk>`_.
+  
+  
 Documentation
 ~~~~~~~~~~~~~
 - start a list of external I/O integrating with ``xarray`` (:issue:`683`, :pull:`4566`).

From 2740483e4da52995eb7839bbd91c74c84e1e9e73 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 16:39:34 -0500
Subject: [PATCH 08/18] Add test for decoding timedeltas with nanosecond units
 too

---
 xarray/coding/times.py            | 10 +++++++++-
 xarray/tests/test_coding_times.py |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 052ebed69a6..e01815a500c 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -36,7 +36,15 @@
 }
 
 TIME_UNITS = frozenset(
-    ["days", "hours", "minutes", "seconds", "milliseconds", "microseconds"]
+    [
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "nanoseconds",
+    ]
 )
 
 
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 7110bb48bb2..092268c9d7b 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -546,6 +546,7 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected):
         ("1h", "hours", np.int64(1)),
         ("1ms", "milliseconds", np.int64(1)),
         ("1us", "microseconds", np.int64(1)),
+        ("1ns", "nanoseconds", np.int64(1)),
         (["NaT", "0s", "1s"], None, [np.nan, 0, 1]),
         (["30m", "60m"], "hours", [0.5, 1.0]),
         ("NaT", "days", np.nan),

From 0c2b41b5789ffadb9d80b894a7c8ab3778ecaaec Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 16:48:21 -0500
Subject: [PATCH 09/18] Some minor cleanups

---
 doc/whats-new.rst                 | 4 ++--
 xarray/tests/test_coding_times.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index c7b6c70b8cd..ceeb4d16c56 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -34,8 +34,8 @@ Bug fixes
 - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`).
 - By default, when possible, xarray will now always use values of type ``int64`` when encoding
   and decoding ``numpy.datetime64[ns]`` datetimes.  This ensures that maximum
-  precision and accuracy is maintained in the round-tripping process
-  (:issue:`4045`). It also enables encoding and decoding standard calendar
+  precision and accuracy are maintained in the round-tripping process
+  (:issue:`4045`, :pull:`4684`). It also enables encoding and decoding standard calendar
   dates with time units of nanoseconds (:pull:`4400`). By `Spencer Clark
   <https://github.com/spencerkclark>`_ and `Mark Harfouche <http://github.com/hmaarrfk>`_.
   
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 092268c9d7b..85fa4fd8a70 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1000,8 +1000,8 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq)
     encoded, _, _ = coding.times.encode_cf_datetime(times, units)
 
     numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units)
-    cf_units_as_timedelta = np.timedelta64(1, numpy_timeunit)
-    if data_freq >= cf_units_as_timedelta:
+    encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit)
+    if data_freq >= encoding_units_as_timedelta:
         assert encoded.dtype == np.int64
     else:
         assert encoded.dtype == np.float64

From db487cef951e80f131803528c1eedacd352eef63 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 17:08:23 -0500
Subject: [PATCH 10/18] Add comment to motivate new test

---
 xarray/coding/times.py            | 2 +-
 xarray/tests/test_coding_times.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index e01815a500c..950245c077a 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -438,7 +438,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
         dates_as_index = pd.DatetimeIndex(dates.ravel())
         time_deltas = dates_as_index - ref_date
 
-        # Use floor division if the time_delta evenly divides all differences
+        # Use floor division if time_delta evenly divides all differences
         # to preserve integer dtype if possible.
         if np.all(time_deltas % time_delta == np.timedelta64(0, delta_units)):
             num = time_deltas // time_delta
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 85fa4fd8a70..93ac9b12521 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1009,8 +1009,10 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq)
 
 @pytest.mark.parametrize("freq", ["N", "U", "L", "S", "T", "H", "D"])
 def test_encode_decode_roundtrip(freq):
+    # See GH #4045. Prior to #4684 this test would fail for frequencies of "S",
+    # "L", "U", and "N".
     initial_time = pd.date_range("1678-01-01", periods=1)
-    times = initial_time.append(pd.date_range("1968", periods=12, freq=freq))
+    times = initial_time.append(pd.date_range("1968", periods=2, freq=freq))
     variable = Variable(["time"], times)
     encoded = conventions.encode_cf_variable(variable)
     decoded = conventions.decode_cf_variable("time", encoded)

From 480388224b5d45026a265f99f6893b1e80cb72c8 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 19:49:51 -0500
Subject: [PATCH 11/18] Add some print statements to try and debug things on
 Windows

---
 xarray/coding/times.py            | 6 ++++++
 xarray/tests/test_coding_times.py | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 950245c077a..4af034f8f55 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -164,14 +164,20 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
     # To avoid integer overflow when converting to nanosecond units for integer
     # dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
     # (GH #2002).
+    print(f"flat_num_dates dtype prior to casting {flat_num_dates.dtype}")
     if flat_num_dates.dtype.kind == "i":
         flat_num_dates = flat_num_dates.astype(np.int64)
+    print(f"flat_num_dates dtype after casting {flat_num_dates.dtype}")
 
     # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
     # works much faster when dealing with integers (GH #1399).
+    print("flat_num_dates_ns_int prior to casting")
+    print(flat_num_dates * _NS_PER_TIME_DELTA[delta])
     flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
         np.int64
     )
+    print("flat_num_dates_ns_int after casting")
+    print(flat_num_dates_ns_int)
 
     # Use pd.to_timedelta to safely cast integer values to timedeltas,
     # and add those to a Timestamp to safely produce a DatetimeIndex.  This
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 93ac9b12521..e082e993081 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -1016,4 +1016,6 @@ def test_encode_decode_roundtrip(freq):
     variable = Variable(["time"], times)
     encoded = conventions.encode_cf_variable(variable)
     decoded = conventions.decode_cf_variable("time", encoded)
+    print(encoded)
+    print(decoded)
     assert_equal(variable, decoded)

From ce6d072616a1af878224ae7ac1947ccfeb725d5e Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 20:57:25 -0500
Subject: [PATCH 12/18] xfail round-trip test on Windows; remove print
 statements

---
 xarray/coding/times.py            | 6 ------
 xarray/tests/test_coding_times.py | 6 ++++--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 4af034f8f55..950245c077a 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -164,20 +164,14 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
     # To avoid integer overflow when converting to nanosecond units for integer
     # dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
     # (GH #2002).
-    print(f"flat_num_dates dtype prior to casting {flat_num_dates.dtype}")
     if flat_num_dates.dtype.kind == "i":
         flat_num_dates = flat_num_dates.astype(np.int64)
-    print(f"flat_num_dates dtype after casting {flat_num_dates.dtype}")
 
     # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
     # works much faster when dealing with integers (GH #1399).
-    print("flat_num_dates_ns_int prior to casting")
-    print(flat_num_dates * _NS_PER_TIME_DELTA[delta])
     flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
         np.int64
     )
-    print("flat_num_dates_ns_int after casting")
-    print(flat_num_dates_ns_int)
 
     # Use pd.to_timedelta to safely cast integer values to timedeltas,
     # and add those to a Timestamp to safely produce a DatetimeIndex.  This
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index e082e993081..0482bdfe8e8 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -16,6 +16,7 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import _update_bounds_attributes, cf_encoder
 from xarray.core.common import contains_cftime_datetimes
+from xarray.tests.test_backends import ON_WINDOWS
 from xarray.testing import assert_equal
 
 from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask
@@ -1008,6 +1009,9 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq)
 
 
 @pytest.mark.parametrize("freq", ["N", "U", "L", "S", "T", "H", "D"])
+@pytest.mark.xfail(
+    ON_WINDOWS, reason="We do not expect exact round-tripping on Windows"
+)
 def test_encode_decode_roundtrip(freq):
     # See GH #4045. Prior to #4684 this test would fail for frequencies of "S",
     # "L", "U", and "N".
@@ -1016,6 +1020,4 @@ def test_encode_decode_roundtrip(freq):
     variable = Variable(["time"], times)
     encoded = conventions.encode_cf_variable(variable)
     decoded = conventions.decode_cf_variable("time", encoded)
-    print(encoded)
-    print(decoded)
     assert_equal(variable, decoded)

From 41d24f18760e91ac99c6f7c15b0f168ac62473f9 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sat, 12 Dec 2020 21:01:46 -0500
Subject: [PATCH 13/18] Don't xfail Windows tests for now; we should figure why
 they fail

---
 xarray/tests/test_coding_times.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 0482bdfe8e8..93ac9b12521 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -16,7 +16,6 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import _update_bounds_attributes, cf_encoder
 from xarray.core.common import contains_cftime_datetimes
-from xarray.tests.test_backends import ON_WINDOWS
 from xarray.testing import assert_equal
 
 from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask
@@ -1009,9 +1008,6 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq)
 
 
 @pytest.mark.parametrize("freq", ["N", "U", "L", "S", "T", "H", "D"])
-@pytest.mark.xfail(
-    ON_WINDOWS, reason="We do not expect exact round-tripping on Windows"
-)
 def test_encode_decode_roundtrip(freq):
     # See GH #4045. Prior to #4684 this test would fail for frequencies of "S",
     # "L", "U", and "N".

From 57814f77df2bc1431233b05ee41e4fb681c4662c Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sun, 13 Dec 2020 15:17:05 -0500
Subject: [PATCH 14/18] Fix things on Windows

---
 xarray/coding/times.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 950245c077a..8ba1ef13d7e 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -274,8 +274,7 @@ def _infer_time_units_from_diff(unique_timedeltas):
     ]:
         delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)]
         unit_delta = np.timedelta64(delta_ns, "ns")
-        diffs = unique_timedeltas / unit_delta
-        if np.all(diffs == diffs.astype(int)):
+        if np.all(unique_timedeltas % unit_delta == np.timedelta64(0, "ns")):
             return time_unit
     return "seconds"
 

From 3ff3cd8cadcf7450f9a4dd627b780a88023605a0 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sun, 13 Dec 2020 15:33:33 -0500
Subject: [PATCH 15/18] Use pandas for divisiblity check for older NumPy
 compatibility

---
 xarray/coding/times.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 8ba1ef13d7e..5a307a28542 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -263,6 +263,7 @@ def decode_cf_timedelta(num_timedeltas, units):
 
 
 def _infer_time_units_from_diff(unique_timedeltas):
+    unique_deltas_as_index = pd.TimedeltaIndex(unique_timedeltas)
     for time_unit in [
         "days",
         "hours",
@@ -273,8 +274,8 @@ def _infer_time_units_from_diff(unique_timedeltas):
         "nanoseconds",
     ]:
         delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)]
-        unit_delta = np.timedelta64(delta_ns, "ns")
-        if np.all(unique_timedeltas % unit_delta == np.timedelta64(0, "ns")):
+        unit_delta = pd.to_timedelta(delta_ns, "ns")
+        if np.all(unique_deltas_as_index % unit_delta == np.timedelta64(0, "ns")):
             return time_unit
     return "seconds"
 

From 367500874fd394b544e98bcd46aedf729f860225 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sun, 13 Dec 2020 18:07:25 -0500
Subject: [PATCH 16/18] Reduce changes needed; improve comments

---
 xarray/coding/times.py            | 13 +++++++---
 xarray/tests/test_coding_times.py | 42 ++++++++-----------------------
 2 files changed, 19 insertions(+), 36 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 5a307a28542..3d104ede2a4 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -163,12 +163,12 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
 
     # To avoid integer overflow when converting to nanosecond units for integer
     # dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
-    # (GH #2002).
+    # (GH 2002).
     if flat_num_dates.dtype.kind == "i":
         flat_num_dates = flat_num_dates.astype(np.int64)
 
     # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
-    # works much faster when dealing with integers (GH #1399).
+    # works much faster when dealing with integers (GH 1399).
     flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
         np.int64
     )
@@ -263,6 +263,11 @@ def decode_cf_timedelta(num_timedeltas, units):
 
 
 def _infer_time_units_from_diff(unique_timedeltas):
+    # Note that the modulus operator was only implemented for np.timedelta64
+    # arrays as of NumPy version 1.16.0.  Once our minimum version of NumPy
+    # supported is greater than or equal to this we will no longer need to cast
+    # unique_timedeltas to a TimedeltaIndex.  In the meantime, however, the
+    # modulus operator works for TimedeltaIndex objects.
     unique_deltas_as_index = pd.TimedeltaIndex(unique_timedeltas)
     for time_unit in [
         "days",
@@ -274,7 +279,7 @@ def _infer_time_units_from_diff(unique_timedeltas):
         "nanoseconds",
     ]:
         delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)]
-        unit_delta = pd.to_timedelta(delta_ns, "ns")
+        unit_delta = np.timedelta64(delta_ns, "ns")
         if np.all(unique_deltas_as_index % unit_delta == np.timedelta64(0, "ns")):
             return time_unit
     return "seconds"
@@ -439,7 +444,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
         time_deltas = dates_as_index - ref_date
 
         # Use floor division if time_delta evenly divides all differences
-        # to preserve integer dtype if possible.
+        # to preserve integer dtype if possible (GH 4045).
         if np.all(time_deltas % time_delta == np.timedelta64(0, delta_units)):
             num = time_deltas // time_delta
         else:
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 93ac9b12521..dfd558f737e 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -479,7 +479,7 @@ def test_decoded_cf_datetime_array_2d():
     assert_array_equal(np.asarray(result), expected)
 
 
-PANDAS_FREQUENCIES_TO_ENCODING_UNITS = {
+FREQUENCIES_TO_ENCODING_UNITS = {
     "N": "nanoseconds",
     "U": "microseconds",
     "L": "milliseconds",
@@ -490,9 +490,7 @@ def test_decoded_cf_datetime_array_2d():
 }
 
 
-@pytest.mark.parametrize(
-    ("freq", "units"), PANDAS_FREQUENCIES_TO_ENCODING_UNITS.items()
-)
+@pytest.mark.parametrize(("freq", "units"), FREQUENCIES_TO_ENCODING_UNITS.items())
 def test_infer_datetime_units(freq, units):
     dates = pd.date_range("2000", periods=2, freq=freq)
     expected = f"{units} since 2000-01-01 00:00:00"
@@ -972,45 +970,25 @@ def test_decode_ambiguous_time_warns(calendar):
     np.testing.assert_array_equal(result, expected)
 
 
-ENCODING_UNITS = [
-    "days",
-    "hours",
-    "minutes",
-    "seconds",
-    "milliseconds",
-    "microseconds",
-    "nanoseconds",
-]
-NUMPY_FREQUENCIES = [
-    np.timedelta64(1, "D"),
-    np.timedelta64(1, "h"),
-    np.timedelta64(1, "m"),
-    np.timedelta64(1, "s"),
-    np.timedelta64(1, "ms"),
-    np.timedelta64(1, "us"),
-    np.timedelta64(1, "ns"),
-]
-
-
-@pytest.mark.parametrize("encoding_units", ENCODING_UNITS)
-@pytest.mark.parametrize("data_freq", NUMPY_FREQUENCIES, ids=lambda x: f"{x!r}")
-def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, data_freq):
-    times = pd.date_range("2000", periods=3, freq=pd.to_timedelta(data_freq))
+@pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values())
+@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
+def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq):
+    times = pd.date_range("2000", periods=3, freq=freq)
     units = f"{encoding_units} since 2000-01-01"
     encoded, _, _ = coding.times.encode_cf_datetime(times, units)
 
     numpy_timeunit = coding.times._netcdf_to_numpy_timeunit(encoding_units)
     encoding_units_as_timedelta = np.timedelta64(1, numpy_timeunit)
-    if data_freq >= encoding_units_as_timedelta:
+    if pd.to_timedelta(1, freq) >= encoding_units_as_timedelta:
         assert encoded.dtype == np.int64
     else:
         assert encoded.dtype == np.float64
 
 
-@pytest.mark.parametrize("freq", ["N", "U", "L", "S", "T", "H", "D"])
+@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
 def test_encode_decode_roundtrip(freq):
-    # See GH #4045. Prior to #4684 this test would fail for frequencies of "S",
-    # "L", "U", and "N".
+    # See GH 4045. Prior to GH 4684 this test would fail for frequencies of
+    # "S", "L", "U", and "N".
     initial_time = pd.date_range("1678-01-01", periods=1)
     times = initial_time.append(pd.date_range("1968", periods=2, freq=freq))
     variable = Variable(["time"], times)

From ed2bce65180316b340102d7ad5529e8dd1712297 Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Sun, 13 Dec 2020 18:12:00 -0500
Subject: [PATCH 17/18] Checking remainder against zero nanoseconds is more
 straightforward

It probably doesn't really matter though.
---
 xarray/coding/times.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 3d104ede2a4..3d877a169f5 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -445,7 +445,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
 
         # Use floor division if time_delta evenly divides all differences
         # to preserve integer dtype if possible (GH 4045).
-        if np.all(time_deltas % time_delta == np.timedelta64(0, delta_units)):
+        if np.all(time_deltas % time_delta == np.timedelta64(0, "ns")):
             num = time_deltas // time_delta
         else:
             num = time_deltas / time_delta

From 450037de70a040cfc4bb2ee0786e5ad88506750d Mon Sep 17 00:00:00 2001
From: Spencer Clark <spencerkclark@gmail.com>
Date: Wed, 16 Dec 2020 06:17:20 -0500
Subject: [PATCH 18/18] Add a note to the breaking changes section

---
 doc/whats-new.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ceeb4d16c56..60d1de94506 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -22,6 +22,11 @@ v0.16.3 (unreleased)
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
+- As a result of :pull:`4684` the default units encoding for
+  datetime-like values (``np.datetime64[ns]`` or ``cftime.datetime``) will now
+  always be set such that ``int64`` values can be used.  In the past, no units
+  finer than "seconds" were chosen, which would sometimes mean that ``float64``
+  values were required, which would lead to inaccurate I/O round-trips.
 
 
 New Features