From 8655920a928fb46c7df1cfc718331cc1ad7a927e Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 3 Jan 2021 19:26:43 -0500 Subject: [PATCH 01/10] Enable exact roundtripping of cftime dates --- doc/whats-new.rst | 8 +++++++- xarray/coding/cftime_offsets.py | 28 +++++++++++++++++++++++++++ xarray/coding/times.py | 2 +- xarray/tests/__init__.py | 1 + xarray/tests/test_cftime_offsets.py | 30 +++++++++++++++++++++++++++++ xarray/tests/test_coding_times.py | 27 ++++++++++++++++++-------- 6 files changed, 86 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 63785d72179..a0255827de8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,13 @@ Breaking changes New Features ~~~~~~~~~~~~ - +- Xarray now leverages updates as of cftime version 1.2.0 which enable exact I/O + rountripping of ``cftime.datetime`` objects (:pull:`XXXX`). + By `Spencer Clark `_. +- :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support + millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies + for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`XXXX`). + By `Spencer Clark `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 3c92c816e12..d39d0618072 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -576,6 +576,26 @@ def __apply__(self, other): return other + self.as_timedelta() +class Millisecond(BaseCFTimeOffset): + _freq = "L" + + def as_timedelta(self): + return timedelta(milliseconds=self.n) + + def __apply__(self, other): + return other + self.as_timedelta() + + +class Microsecond(BaseCFTimeOffset): + _freq = "U" + + def as_timedelta(self): + return timedelta(microseconds=self.n) + + def __apply__(self, other): + return other + self.as_timedelta() + + _FREQUENCIES = { "A": YearEnd, "AS": YearBegin, @@ -590,6 +610,10 @@ def __apply__(self, other): "T": Minute, "min": Minute, "S": Second, + "L": Millisecond, + "ms": Millisecond, + "U": Microsecond, + "us": Microsecond, "AS-JAN": partial(YearBegin, month=1), "AS-FEB": partial(YearBegin, month=2), "AS-MAR": partial(YearBegin, month=3), @@ -856,6 +880,10 @@ def cftime_range( +--------+--------------------------+ | S | Second frequency | +--------+--------------------------+ + | L, ms | Millisecond frequency | + +--------+--------------------------+ + | U, us | Microsecond frequency | + +--------+--------------------------+ Any multiples of the following anchored offsets are also supported. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3d877a169f5..a8d1e67a0dd 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -226,7 +226,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = cftime_to_nptime(dates) elif use_cftime: dates = _decode_datetime_with_cftime( - flat_num_dates.astype(float), units, calendar + flat_num_dates, units, calendar ) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7c18f1a8c8a..5f113f48665 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,6 +68,7 @@ def LooseVersion(vstring): has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") +has_cftime_1_2_0, requires_cftime_1_2_0 = _importorskip("cftime", minversion="1.2.0.0") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 3efcf8039c6..2c55728ceb6 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -16,6 +16,8 @@ QuarterBegin, QuarterEnd, Second, + Millisecond, + Microsecond, YearBegin, YearEnd, _days_in_month, @@ -181,6 +183,14 @@ def test_to_offset_offset_input(offset): ("2min", Minute(n=2)), ("S", Second()), ("2S", Second(n=2)), + ("L", Millisecond(n=1)), + ("2L", Millisecond(n=2)), + ("ms", Millisecond(n=1)), + ("2ms", Millisecond(n=2)), + ("U", Microsecond(n=1)), + ("2U", Microsecond(n=2)), + ("us", Microsecond(n=1)), + ("2us", Microsecond(n=2)) ], ids=_id_func, ) @@ -299,6 +309,8 @@ def test_to_cftime_datetime_error_type_error(): Hour(), Minute(), Second(), + Millisecond(), + Microsecond(), ] _EQ_TESTS_B = [ BaseCFTimeOffset(n=2), @@ -316,6 +328,8 @@ def test_to_cftime_datetime_error_type_error(): Hour(n=2), Minute(n=2), Second(n=2), + Millisecond(n=2), + Microsecond(n=2), ] @@ -340,6 +354,8 @@ def test_neq(a, b): Hour(n=2), Minute(n=2), Second(n=2), + Millisecond(n=2), + Microsecond(n=2), ] @@ -360,6 +376,8 @@ def test_eq(a, b): (Hour(), Hour(n=3)), (Minute(), Minute(n=3)), (Second(), Second(n=3)), + (Millisecond(), Millisecond(n=3)), + (Microsecond(), Microsecond(n=3)) ] @@ -387,6 +405,8 @@ def test_rmul(offset, expected): (Hour(), Hour(n=-1)), (Minute(), Minute(n=-1)), (Second(), Second(n=-1)), + (Millisecond(), Millisecond(n=-1)), + (Microsecond(), Microsecond(n=-1)) ], ids=_id_func, ) @@ -399,6 +419,8 @@ def test_neg(offset, expected): (Hour(n=2), (1, 1, 1, 2)), (Minute(n=2), (1, 1, 1, 0, 2)), (Second(n=2), (1, 1, 1, 0, 0, 2)), + (Millisecond(n=2), (1, 1, 1, 0, 0, 0, 2000)), + (Microsecond(n=2), (1, 1, 1, 0, 0, 0, 2)) ] @@ -427,6 +449,8 @@ def test_radd_sub_monthly(offset, expected_date_args, calendar): (Hour(n=2), (1, 1, 2, 22)), (Minute(n=2), (1, 1, 2, 23, 58)), (Second(n=2), (1, 1, 2, 23, 59, 58)), + (Millisecond(n=2), (1, 1, 2, 23, 59, 59, 998000)), + (Microsecond(n=2), (1, 1, 2, 23, 59, 59, 999998)) ], ids=_id_func, ) @@ -802,6 +826,8 @@ def test_add_quarter_end_onOffset( ((1, 1, 1), Hour(), True), ((1, 1, 1), Minute(), True), ((1, 1, 1), Second(), True), + ((1, 1, 1), Millisecond(), True), + ((1, 1, 1), Microsecond(), True) ], ids=_id_func, ) @@ -865,6 +891,8 @@ def test_onOffset_month_or_quarter_or_year_end( (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), + (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)), + (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), ], ids=_id_func, ) @@ -914,6 +942,8 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_ (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), + (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)), + (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), ], ids=_id_func, ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index dfd558f737e..1489b110932 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -6,7 +6,7 @@ import pytest from pandas.errors import OutOfBoundsDatetime -from xarray import DataArray, Dataset, Variable, coding, conventions, decode_cf +from xarray import DataArray, Dataset, Variable, cftime_range, coding, conventions, decode_cf from xarray.coding.times import ( cftime_to_nptime, decode_cf_datetime, @@ -18,7 +18,7 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal -from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask +from . import arm_xfail, assert_array_equal, has_cftime, has_cftime_1_2_0, requires_cftime, requires_dask _NON_STANDARD_CALENDARS_SET = { "noleap", @@ -972,8 +972,13 @@ def test_decode_ambiguous_time_warns(calendar): @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq): - times = pd.date_range("2000", periods=3, freq=freq) +@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) +def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date_range): + if not has_cftime and date_range == cftime_range: + pytest.skip("Test requires cftime.") + if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range: + pytest.skip("Nanosecond frequency is not valid for cftime dates.") + times = date_range("2000", periods=3, freq=freq) units = f"{encoding_units} since 2000-01-01" encoded, _, _ = coding.times.encode_cf_datetime(times, units) @@ -986,12 +991,18 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq): @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -def test_encode_decode_roundtrip(freq): +@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) +def test_encode_decode_roundtrip(freq, date_range): # See GH 4045. Prior to GH 4684 this test would fail for frequencies of # "S", "L", "U", and "N". - initial_time = pd.date_range("1678-01-01", periods=1) - times = initial_time.append(pd.date_range("1968", periods=2, freq=freq)) + if not has_cftime_1_2_0 and date_range == cftime_range: + pytest.skip("Exact roundtripping requires cftime 1.2.0.") + if freq == "N" and date_range == cftime_range: + pytest.skip("Nanosecond frequency is not valid for cftime dates.") + initial_time = date_range("1678-01-01", periods=1) + times = initial_time.append(date_range("1968", periods=2, freq=freq)) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) - decoded = conventions.decode_cf_variable("time", encoded) + use_cftime = date_range == cftime_range + decoded = conventions.decode_cf_variable("time", encoded, use_cftime=use_cftime) assert_equal(variable, decoded) From e159034c7b4185f695d458bc9fe66500a4053e48 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 3 Jan 2021 19:27:46 -0500 Subject: [PATCH 02/10] black --- xarray/coding/times.py | 4 +--- xarray/tests/test_cftime_offsets.py | 12 ++++++------ xarray/tests/test_coding_times.py | 19 +++++++++++++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index a8d1e67a0dd..77e4e55dea3 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -225,9 +225,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if calendar in _STANDARD_CALENDARS: dates = cftime_to_nptime(dates) elif use_cftime: - dates = _decode_datetime_with_cftime( - flat_num_dates, units, calendar - ) + dates = _decode_datetime_with_cftime(flat_num_dates, units, calendar) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 2c55728ceb6..3fa793a0c8d 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -190,7 +190,7 @@ def test_to_offset_offset_input(offset): ("U", Microsecond(n=1)), ("2U", Microsecond(n=2)), ("us", Microsecond(n=1)), - ("2us", Microsecond(n=2)) + ("2us", Microsecond(n=2)), ], ids=_id_func, ) @@ -377,7 +377,7 @@ def test_eq(a, b): (Minute(), Minute(n=3)), (Second(), Second(n=3)), (Millisecond(), Millisecond(n=3)), - (Microsecond(), Microsecond(n=3)) + (Microsecond(), Microsecond(n=3)), ] @@ -406,7 +406,7 @@ def test_rmul(offset, expected): (Minute(), Minute(n=-1)), (Second(), Second(n=-1)), (Millisecond(), Millisecond(n=-1)), - (Microsecond(), Microsecond(n=-1)) + (Microsecond(), Microsecond(n=-1)), ], ids=_id_func, ) @@ -420,7 +420,7 @@ def test_neg(offset, expected): (Minute(n=2), (1, 1, 1, 0, 2)), (Second(n=2), (1, 1, 1, 0, 0, 2)), (Millisecond(n=2), (1, 1, 1, 0, 0, 0, 2000)), - (Microsecond(n=2), (1, 1, 1, 0, 0, 0, 2)) + (Microsecond(n=2), (1, 1, 1, 0, 0, 0, 2)), ] @@ -450,7 +450,7 @@ def test_radd_sub_monthly(offset, expected_date_args, calendar): (Minute(n=2), (1, 1, 2, 23, 58)), (Second(n=2), (1, 1, 2, 23, 59, 58)), (Millisecond(n=2), (1, 1, 2, 23, 59, 59, 998000)), - (Microsecond(n=2), (1, 1, 2, 23, 59, 59, 999998)) + (Microsecond(n=2), (1, 1, 2, 23, 59, 59, 999998)), ], ids=_id_func, ) @@ -827,7 +827,7 @@ def test_add_quarter_end_onOffset( ((1, 1, 1), Minute(), True), ((1, 1, 1), Second(), True), ((1, 1, 1), Millisecond(), True), - ((1, 1, 1), Microsecond(), True) + ((1, 1, 1), Microsecond(), True), ], ids=_id_func, ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 1489b110932..6653577c88d 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -6,7 +6,15 @@ import pytest from pandas.errors import OutOfBoundsDatetime -from xarray import DataArray, Dataset, Variable, cftime_range, coding, conventions, decode_cf +from xarray import ( + DataArray, + Dataset, + Variable, + cftime_range, + coding, + conventions, + decode_cf, +) from xarray.coding.times import ( cftime_to_nptime, decode_cf_datetime, @@ -18,7 +26,14 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal -from . import arm_xfail, assert_array_equal, has_cftime, has_cftime_1_2_0, requires_cftime, requires_dask +from . import ( + arm_xfail, + assert_array_equal, + has_cftime, + has_cftime_1_2_0, + requires_cftime, + requires_dask, +) _NON_STANDARD_CALENDARS_SET = { "noleap", From 396f20ed6b773dc91a1d70632932135503f1b050 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 3 Jan 2021 19:44:52 -0500 Subject: [PATCH 03/10] Recommend at least cftime 1.2.1 for exact roundtripping It includes a minor bug fix to the changes introduced in 1.2.0. --- doc/whats-new.rst | 2 +- xarray/tests/__init__.py | 2 +- xarray/tests/test_coding_times.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a0255827de8..4300f5d5aa2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,7 @@ Breaking changes New Features ~~~~~~~~~~~~ -- Xarray now leverages updates as of cftime version 1.2.0 which enable exact I/O +- Xarray now leverages updates as of cftime version 1.2.1 which enable exact I/O rountripping of ``cftime.datetime`` objects (:pull:`XXXX`). By `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 5f113f48665..d8aec82e27d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,7 +68,7 @@ def LooseVersion(vstring): has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") -has_cftime_1_2_0, requires_cftime_1_2_0 = _importorskip("cftime", minversion="1.2.0.0") +has_cftime_1_2_1, requires_cftime_1_2_1 = _importorskip("cftime", minversion="1.2.1.0") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 6653577c88d..6516c623800 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -30,7 +30,7 @@ arm_xfail, assert_array_equal, has_cftime, - has_cftime_1_2_0, + has_cftime_1_2_1, requires_cftime, requires_dask, ) @@ -1010,8 +1010,8 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date def test_encode_decode_roundtrip(freq, date_range): # See GH 4045. Prior to GH 4684 this test would fail for frequencies of # "S", "L", "U", and "N". - if not has_cftime_1_2_0 and date_range == cftime_range: - pytest.skip("Exact roundtripping requires cftime 1.2.0.") + if not has_cftime_1_2_1 and date_range == cftime_range: + pytest.skip("Exact roundtripping requires cftime 1.2.1.") if freq == "N" and date_range == cftime_range: pytest.skip("Nanosecond frequency is not valid for cftime dates.") initial_time = date_range("1678-01-01", periods=1) From 6e94f9525fb1ed68dba8c153418fd0bf06b6d34b Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 3 Jan 2021 19:55:27 -0500 Subject: [PATCH 04/10] Add PR number in what's new --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4300f5d5aa2..e8504fe2547 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,11 +34,11 @@ Breaking changes New Features ~~~~~~~~~~~~ - Xarray now leverages updates as of cftime version 1.2.1 which enable exact I/O - rountripping of ``cftime.datetime`` objects (:pull:`XXXX`). + rountripping of ``cftime.datetime`` objects (:pull:`4758`). By `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies - for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`XXXX`). + for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`). By `Spencer Clark `_. Bug fixes From acf16a63d2992e376167460710f5271aa9d9d966 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 29 Jan 2021 09:03:39 -0500 Subject: [PATCH 05/10] Progress --- xarray/coding/times.py | 38 +++++++++++++++++++++++++++---- xarray/tests/test_coding_times.py | 24 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 77e4e55dea3..2adaaec22b2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,6 +1,6 @@ import re import warnings -from datetime import datetime +from datetime import datetime, timedelta from distutils.version import LooseVersion from functools import partial @@ -35,6 +35,16 @@ "D": int(1e9) * 60 * 60 * 24, } +_US_PER_TIME_DELTA = { + "microseconds": 1, + "milliseconds": 1_000, + "seconds": 1_000_000, + "minutes": 60 * 1_000_000, + "hours": 60 * 60 * 1_000_000, + "days": 24 * 60 * 60 * 1_000_000, +} + + TIME_UNITS = frozenset( [ "days", @@ -266,7 +276,7 @@ def _infer_time_units_from_diff(unique_timedeltas): # supported is greater than or equal to this we will no longer need to cast # unique_timedeltas to a TimedeltaIndex. In the meantime, however, the # modulus operator works for TimedeltaIndex objects. - unique_deltas_as_index = pd.TimedeltaIndex(unique_timedeltas) + unique_deltas_as_index = unique_timedeltas #pd.TimedeltaIndex(unique_timedeltas) for time_unit in [ "days", "hours", @@ -283,6 +293,22 @@ def _infer_time_units_from_diff(unique_timedeltas): return "seconds" +def _infer_time_units_from_diff_datetime_timedelta(unique_timedeltas): + for time_unit in [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + ]: + delta_us = _US_PER_TIME_DELTA[time_unit] + unit_delta = timedelta(microseconds=delta_us) + if np.all(unique_timedeltas % unit_delta == timedelta(microseconds=0)): + return time_unit + return "seconds" + + def infer_calendar_name(dates): """Given an array of datetimes, infer the CF calendar name""" if np.asarray(dates).dtype == "datetime64[ns]": @@ -308,10 +334,12 @@ def infer_datetime_units(dates): reference_date = format_cftime_datetime(reference_date) unique_timedeltas = np.unique(np.diff(dates)) if unique_timedeltas.dtype == np.dtype("O"): + units = _infer_time_units_from_diff_datetime_timedelta(unique_timedeltas) # Convert to np.timedelta64 objects using pandas to work around a # NumPy casting bug: https://github.com/numpy/numpy/issues/11096 - unique_timedeltas = to_timedelta_unboxed(unique_timedeltas) - units = _infer_time_units_from_diff(unique_timedeltas) + # unique_timedeltas = to_timedelta_unboxed(unique_timedeltas) + else: + units = _infer_time_units_from_diff(unique_timedeltas) return f"{units} since {reference_date}" @@ -452,7 +480,9 @@ def encode_cf_datetime(dates, units=None, calendar=None): except (OutOfBoundsDatetime, OverflowError): num = _encode_datetime_with_cftime(dates, units, calendar) + print(num) num = cast_to_int_if_safe(num) + print(num) return (num, units, calendar) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 6516c623800..85d0233b73a 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1014,6 +1014,8 @@ def test_encode_decode_roundtrip(freq, date_range): pytest.skip("Exact roundtripping requires cftime 1.2.1.") if freq == "N" and date_range == cftime_range: pytest.skip("Nanosecond frequency is not valid for cftime dates.") + # if ON_WINDOWS and freq == "U" and date_range == cftime_range: + # pytest.skip("Windows timedeltas use int32 instead of int64 so have a more limited range") initial_time = date_range("1678-01-01", periods=1) times = initial_time.append(date_range("1968", periods=2, freq=freq)) variable = Variable(["time"], times) @@ -1021,3 +1023,25 @@ def test_encode_decode_roundtrip(freq, date_range): use_cftime = date_range == cftime_range decoded = conventions.decode_cf_variable("time", encoded, use_cftime=use_cftime) assert_equal(variable, decoded) + +@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) +@pytest.mark.parametrize("date_range", [cftime_range]) +def test_encode_decode_roundtrip_cftime(freq, date_range): + # See GH 4045. Prior to GH 4684 this test would fail for frequencies of + # "S", "L", "U", and "N". + from datetime import timedelta + if not has_cftime_1_2_1 and date_range == cftime_range: + pytest.skip("Exact roundtripping requires cftime 1.2.1.") + if freq == "N" and date_range == cftime_range: + pytest.skip("Nanosecond frequency is not valid for cftime dates.") + # if ON_WINDOWS and freq == "U" and date_range == cftime_range: + # pytest.skip("Windows timedeltas use int32 instead of int64 so have a more limited range") + initial_time = date_range("0001", periods=1) + times = initial_time.append(date_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365)) + variable = Variable(["time"], times) + encoded = conventions.encode_cf_variable(variable) + use_cftime = date_range == cftime_range + print(times) + print(encoded) + decoded = conventions.decode_cf_variable("time", encoded, use_cftime=use_cftime) + assert_equal(variable, decoded) From 94f1a504f32c2d4e711998f9393b3386ac7ddcbd Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 6 Feb 2021 13:03:15 -0500 Subject: [PATCH 06/10] Updates --- doc/whats-new.rst | 2 +- xarray/coding/times.py | 78 ++++++++++++++----------------- xarray/tests/__init__.py | 2 +- xarray/tests/test_coding_times.py | 48 +++++++------------ 4 files changed, 55 insertions(+), 75 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e8504fe2547..51a8f4052e2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,7 @@ Breaking changes New Features ~~~~~~~~~~~~ -- Xarray now leverages updates as of cftime version 1.2.1 which enable exact I/O +- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O rountripping of ``cftime.datetime`` objects (:pull:`4758`). By `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2adaaec22b2..94bffc2ad0e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -44,6 +44,16 @@ "days": 24 * 60 * 60 * 1_000_000, } +_NETCDF_TIME_UNITS_CFTIME = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", +] + +_NETCDF_TIME_UNITS_NUMPY = _NETCDF_TIME_UNITS_CFTIME + ["nanoseconds"] TIME_UNITS = frozenset( [ @@ -270,41 +280,33 @@ def decode_cf_timedelta(num_timedeltas, units): return result.reshape(num_timedeltas.shape) -def _infer_time_units_from_diff(unique_timedeltas): - # Note that the modulus operator was only implemented for np.timedelta64 - # arrays as of NumPy version 1.16.0. Once our minimum version of NumPy - # supported is greater than or equal to this we will no longer need to cast - # unique_timedeltas to a TimedeltaIndex. In the meantime, however, the - # modulus operator works for TimedeltaIndex objects. - unique_deltas_as_index = unique_timedeltas #pd.TimedeltaIndex(unique_timedeltas) - for time_unit in [ - "days", - "hours", - "minutes", - "seconds", - "milliseconds", - "microseconds", - "nanoseconds", - ]: - delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)] - unit_delta = np.timedelta64(delta_ns, "ns") - if np.all(unique_deltas_as_index % unit_delta == np.timedelta64(0, "ns")): - return time_unit - return "seconds" +def _unit_timedelta_cftime(units): + return timedelta(microseconds=_US_PER_TIME_DELTA[units]) -def _infer_time_units_from_diff_datetime_timedelta(unique_timedeltas): - for time_unit in [ - "days", - "hours", - "minutes", - "seconds", - "milliseconds", - "microseconds", - ]: - delta_us = _US_PER_TIME_DELTA[time_unit] - unit_delta = timedelta(microseconds=delta_us) - if np.all(unique_timedeltas % unit_delta == timedelta(microseconds=0)): +def _unit_timedelta_numpy(units): + numpy_units = _netcdf_to_numpy_timeunit(units) + return np.timedelta64(_NS_PER_TIME_DELTA[numpy_units], "ns") + + +def _infer_time_units_from_diff(unique_timedeltas): + if unique_timedeltas.dtype == np.dtype("O"): + time_units = _NETCDF_TIME_UNITS_CFTIME + unit_timedelta = _unit_timedelta_cftime + zero_timedelta = timedelta(microseconds=0) + timedeltas = unique_timedeltas + else: + time_units = _NETCDF_TIME_UNITS_NUMPY + unit_timedelta = _unit_timedelta_numpy + zero_timedelta = np.timedelta64(0, "ns") + # Note that the modulus operator was only implemented for np.timedelta64 + # arrays as of NumPy version 1.16.0. Once our minimum version of NumPy + # supported is greater than or equal to this we will no longer need to cast + # unique_timedeltas to a TimedeltaIndex. In the meantime, however, the + # modulus operator works for TimedeltaIndex objects. + timedeltas = pd.TimedeltaIndex(unique_timedeltas) + for time_unit in time_units: + if np.all(timedeltas % unit_timedelta(time_unit) == zero_timedelta): return time_unit return "seconds" @@ -333,13 +335,7 @@ def infer_datetime_units(dates): reference_date = dates[0] if len(dates) > 0 else "1970-01-01" reference_date = format_cftime_datetime(reference_date) unique_timedeltas = np.unique(np.diff(dates)) - if unique_timedeltas.dtype == np.dtype("O"): - units = _infer_time_units_from_diff_datetime_timedelta(unique_timedeltas) - # Convert to np.timedelta64 objects using pandas to work around a - # NumPy casting bug: https://github.com/numpy/numpy/issues/11096 - # unique_timedeltas = to_timedelta_unboxed(unique_timedeltas) - else: - units = _infer_time_units_from_diff(unique_timedeltas) + units = _infer_time_units_from_diff(unique_timedeltas) return f"{units} since {reference_date}" @@ -480,9 +476,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): except (OutOfBoundsDatetime, OverflowError): num = _encode_datetime_with_cftime(dates, units, calendar) - print(num) num = cast_to_int_if_safe(num) - print(num) return (num, units, calendar) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d8aec82e27d..a7761aefa3d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,7 +68,7 @@ def LooseVersion(vstring): has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") -has_cftime_1_2_1, requires_cftime_1_2_1 = _importorskip("cftime", minversion="1.2.1.0") +has_cftime_1_4_1, requires_cftime_1_4_1 = _importorskip("cftime", minversion="1.4.1") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 85d0233b73a..31ec47d52a1 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -6,6 +6,8 @@ import pytest from pandas.errors import OutOfBoundsDatetime +from datetime import timedelta + from xarray import ( DataArray, Dataset, @@ -30,7 +32,7 @@ arm_xfail, assert_array_equal, has_cftime, - has_cftime_1_2_1, + has_cftime_1_4_1, requires_cftime, requires_dask, ) @@ -1006,42 +1008,26 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) -def test_encode_decode_roundtrip(freq, date_range): +def test_encode_decode_roundtrip_datetime64(freq): # See GH 4045. Prior to GH 4684 this test would fail for frequencies of # "S", "L", "U", and "N". - if not has_cftime_1_2_1 and date_range == cftime_range: - pytest.skip("Exact roundtripping requires cftime 1.2.1.") - if freq == "N" and date_range == cftime_range: - pytest.skip("Nanosecond frequency is not valid for cftime dates.") - # if ON_WINDOWS and freq == "U" and date_range == cftime_range: - # pytest.skip("Windows timedeltas use int32 instead of int64 so have a more limited range") - initial_time = date_range("1678-01-01", periods=1) - times = initial_time.append(date_range("1968", periods=2, freq=freq)) + initial_time = pd.date_range("1678-01-01", periods=1) + times = initial_time.append(pd.date_range("1968", periods=2, freq=freq)) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) - use_cftime = date_range == cftime_range - decoded = conventions.decode_cf_variable("time", encoded, use_cftime=use_cftime) + decoded = conventions.decode_cf_variable("time", encoded) assert_equal(variable, decoded) -@pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -@pytest.mark.parametrize("date_range", [cftime_range]) -def test_encode_decode_roundtrip_cftime(freq, date_range): - # See GH 4045. Prior to GH 4684 this test would fail for frequencies of - # "S", "L", "U", and "N". - from datetime import timedelta - if not has_cftime_1_2_1 and date_range == cftime_range: - pytest.skip("Exact roundtripping requires cftime 1.2.1.") - if freq == "N" and date_range == cftime_range: - pytest.skip("Nanosecond frequency is not valid for cftime dates.") - # if ON_WINDOWS and freq == "U" and date_range == cftime_range: - # pytest.skip("Windows timedeltas use int32 instead of int64 so have a more limited range") - initial_time = date_range("0001", periods=1) - times = initial_time.append(date_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365)) + +@pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"]) +def test_encode_decode_roundtrip_cftime(freq): + if not has_cftime_1_4_1: + pytest.skip("Exact roundtripping requires at least cftime 1.4.1.") + initial_time = cftime_range("0001", periods=1) + times = initial_time.append( + cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365) + ) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) - use_cftime = date_range == cftime_range - print(times) - print(encoded) - decoded = conventions.decode_cf_variable("time", encoded, use_cftime=use_cftime) + decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True) assert_equal(variable, decoded) From 451b1015430be113c55bc44dd0f7bea2a8657f57 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 6 Feb 2021 13:11:10 -0500 Subject: [PATCH 07/10] lint --- doc/whats-new.rst | 6 +++--- xarray/tests/test_cftime_offsets.py | 4 ++-- xarray/tests/test_coding_times.py | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b342156fef5..d03a893c11b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,11 +52,11 @@ Deprecations New Features ~~~~~~~~~~~~ -- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O - rountripping of ``cftime.datetime`` objects (:pull:`4758`). +- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O + rountripping of ``cftime.datetime`` objects (:pull:`4758`). By `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support - millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies + millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`). By `Spencer Clark `_. - Significantly higher ``unstack`` performance on numpy-backed arrays which diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 3fa793a0c8d..32a01408604 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -10,14 +10,14 @@ BaseCFTimeOffset, Day, Hour, + Millisecond, + Microsecond, Minute, MonthBegin, MonthEnd, QuarterBegin, QuarterEnd, Second, - Millisecond, - Microsecond, YearBegin, YearEnd, _days_in_month, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 31ec47d52a1..6093c13486e 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,4 +1,5 @@ import warnings +from datetime import timedelta from itertools import product import numpy as np @@ -6,8 +7,6 @@ import pytest from pandas.errors import OutOfBoundsDatetime -from datetime import timedelta - from xarray import ( DataArray, Dataset, From 6697a70a522d57fe912c95eb0464582174ddf14a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 7 Feb 2021 18:15:08 -0500 Subject: [PATCH 08/10] isort --- xarray/tests/test_cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 32a01408604..b1ecf059f2f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -10,8 +10,8 @@ BaseCFTimeOffset, Day, Hour, - Millisecond, Microsecond, + Millisecond, Minute, MonthBegin, MonthEnd, From 80f7022554b51e01bf39f62d4ca483ec55500434 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 7 Feb 2021 20:24:24 -0500 Subject: [PATCH 09/10] Update documentation --- xarray/coding/cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c335a7f165e..c25d5296c41 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -848,7 +848,7 @@ def cftime_range( `ISO-8601 format `_. - It supports many, but not all, frequencies supported by ``pandas.date_range``. For example it does not currently support any of - the business-related, semi-monthly, or sub-second frequencies. + the business-related or semi-monthly frequencies. - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as these can easily be written in terms of the finest common resolution, e.g. '61min'. From 725bcabb8c965f8829f2b82a245789eec0cbc0a6 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 10 Feb 2021 14:51:15 -0500 Subject: [PATCH 10/10] Some minor cleanups --- doc/whats-new.rst | 2 +- xarray/tests/test_coding_times.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a18da5bdcf6..ac326342115 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,7 +53,7 @@ Deprecations New Features ~~~~~~~~~~~~ - Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O - rountripping of ``cftime.datetime`` objects (:pull:`4758`). + roundtripping of ``cftime.datetime`` objects (:pull:`4758`). By `Spencer Clark `_. - :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 463fcc2748e..eda32d31148 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -34,6 +34,7 @@ has_cftime, has_cftime_1_4_1, requires_cftime, + requires_cftime_1_4_1, requires_dask, ) @@ -991,8 +992,8 @@ def test_decode_ambiguous_time_warns(calendar): @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) @pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date_range): - if not has_cftime and date_range == cftime_range: - pytest.skip("Test requires cftime.") + if not has_cftime_1_4_1 and date_range == cftime_range: + pytest.skip("Test requires cftime 1.4.1.") if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range: pytest.skip("Nanosecond frequency is not valid for cftime dates.") times = date_range("2000", periods=3, freq=freq) @@ -1019,10 +1020,9 @@ def test_encode_decode_roundtrip_datetime64(freq): assert_equal(variable, decoded) +@requires_cftime_1_4_1 @pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"]) def test_encode_decode_roundtrip_cftime(freq): - if not has_cftime_1_4_1: - pytest.skip("Exact roundtripping requires at least cftime 1.4.1.") initial_time = cftime_range("0001", periods=1) times = initial_time.append( cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365)