From 239309f881ba0d7e02280147bc443e6e286e6a63 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 16 Apr 2024 10:53:41 -0400 Subject: [PATCH] Convert 360_day calendars by choosing random dates to drop or add (#8603) * Convert 360 calendar randomly * add note to whats new * add pull number to whats new entry * run pre-commit * Change test to use recommended freq * Apply suggestions from code review Co-authored-by: Spencer Clark * Fix merge - remove rng arg --------- Co-authored-by: Spencer Clark --- doc/whats-new.rst | 2 ++ xarray/coding/calendar_ops.py | 53 ++++++++++++++++++++++++++----- xarray/tests/test_calendar_ops.py | 39 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 41b39bf2b1a..24dcb8c9687 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ v2024.04.0 (unreleased) New Features ~~~~~~~~~~~~ +- New "random" method for converting to and from 360_day calendars (:pull:`8603`). + By `Pascal Bourgault `_. Breaking changes diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index dc2f95b832e..c4fe9e1f4ae 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -64,7 +64,7 @@ def convert_calendar( The target calendar name. dim : str Name of the time coordinate in the input DataArray or Dataset. - align_on : {None, 'date', 'year'} + align_on : {None, 'date', 'year', 'random'} Must be specified when either the source or target is a `"360_day"` calendar; ignored otherwise. See Notes. missing : any, optional @@ -143,6 +143,16 @@ def convert_calendar( will be dropped as there are no equivalent dates in a standard calendar. This option is best used with data on a frequency coarser than daily. + + "random" + Similar to "year", each day of year of the source is mapped to another day of year + of the target. However, instead of having always the same missing days according + the source and target years, here 5 days are chosen randomly, one for each fifth + of the year. However, February 29th is always missing when converting to a leap year, + or its value is dropped when converting from a leap year. This is similar to the method + used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1). + + This option is best used on daily data. """ from xarray.core.dataarray import DataArray @@ -174,14 +184,20 @@ def convert_calendar( out = obj.copy() - if align_on == "year": + if align_on in ["year", "random"]: # Special case for conversion involving 360_day calendar - # Instead of translating dates directly, this tries to keep the position within a year similar. - - new_doy = time.groupby(f"{dim}.year").map( - _interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime - ) - + if align_on == "year": + # Instead of translating dates directly, this tries to keep the position within a year similar. + new_doy = time.groupby(f"{dim}.year").map( + _interpolate_day_of_year, + target_calendar=calendar, + use_cftime=use_cftime, + ) + elif align_on == "random": + # The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year. + new_doy = time.groupby(f"{dim}.year").map( + _random_day_of_year, target_calendar=calendar, use_cftime=use_cftime + ) # Convert the source datetimes, but override the day of year with our new day of years. out[dim] = DataArray( [ @@ -229,6 +245,27 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime): ).astype(int) +def _random_day_of_year(time, target_calendar, use_cftime): + """Return a day of year in the new calendar. + + Removes Feb 29th and five other days chosen randomly within five sections of 72 days. + """ + year = int(time.dt.year[0]) + source_calendar = time.dt.calendar + new_doy = np.arange(360) + 1 + rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5) + if source_calendar == "360_day": + for idx in rm_idx: + new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 + if _days_in_year(year, target_calendar, use_cftime) == 366: + new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 + elif target_calendar == "360_day": + new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) + if _days_in_year(year, source_calendar, use_cftime) == 366: + new_doy = np.insert(new_doy, 60, -1) + return new_doy[time.dt.dayofyear - 1] + + def _convert_to_new_calendar_with_new_day_of_year( date, day_of_year, calendar, use_cftime ): diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index d2792034876..5a57083fd22 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -106,6 +106,45 @@ def test_convert_calendar_360_days(source, target, freq, align_on): assert conv.size == 359 if freq == "D" else 359 * 4 +def test_convert_calendar_360_days_random(): + da_std = DataArray( + np.linspace(0, 1, 366), + dims=("time",), + coords={ + "time": date_range( + "2004-01-01", + "2004-12-31", + freq="D", + calendar="standard", + use_cftime=False, + ) + }, + ) + da_360 = DataArray( + np.linspace(0, 1, 360), + dims=("time",), + coords={ + "time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day") + }, + ) + + conv = convert_calendar(da_std, "360_day", align_on="random") + conv2 = convert_calendar(da_std, "360_day", align_on="random") + assert (conv != conv2).any() + + conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") + assert np.datetime64("2004-02-29") not in conv.time + conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random") + assert (conv2 != conv).any() + + # Ensure that added days are evenly distributed in the 5 fifths of each year + conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN) + conv = conv.where(conv.isnull(), drop=True) + nandoys = conv.time.dt.dayofyear[:366] + assert all(nandoys < np.array([74, 147, 220, 293, 366])) + assert all(nandoys > np.array([0, 73, 146, 219, 292])) + + @requires_cftime @pytest.mark.parametrize( "source,target,freq",