diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bd573d1fc75..10afe945d4a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,6 +33,14 @@ v0.11.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- Time bounds variables are now also decoded according to CF conventions + (:issue:`2565`). The previous behavior was to decode them only if they + had specific time attributes, now these attributes are copied + automatically from the corresponding time coordinate. This might + brake downstream code that was relying on these variables to be + not decoded. + By `Fabien Maussion `_. + Enhancements ~~~~~~~~~~~~ @@ -46,7 +54,6 @@ Enhancements `_. - Support Dask ``HighLevelGraphs`` by `Matthew Rocklin `_. - Bug fixes ~~~~~~~~~ diff --git a/xarray/conventions.py b/xarray/conventions.py index f60ee6b2c15..ea85a6d5b74 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, return Variable(dimensions, data, attributes, encoding=encoding) +def _update_bounds_attributes(variables): + """Adds time attributes to time bounds variables. + + Variables handling time bounds ("Cell boundaries" in the CF + conventions) do not necessarily carry the necessary attributes to be + decoded. This copies the attributes from the time variable to the + associated boundaries. + + See Also: + + http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ + cf-conventions.html#cell-boundaries + + https://github.com/pydata/xarray/issues/2565 + """ + + # For all time variables with bounds + for v in variables.values(): + attrs = v.attrs + has_date_units = 'units' in attrs and 'since' in attrs['units'] + if has_date_units and 'bounds' in attrs: + if attrs['bounds'] in variables: + bounds_attrs = variables[attrs['bounds']].attrs + bounds_attrs.setdefault('units', attrs['units']) + if 'calendar' in attrs: + bounds_attrs.setdefault('calendar', attrs['calendar']) + + def decode_cf_variables(variables, attributes, concat_characters=True, mask_and_scale=True, decode_times=True, decode_coords=True, drop_variables=None): """ - Decode a several CF encoded variables. + Decode several CF encoded variables. See: decode_cf_variable """ @@ -350,6 +378,10 @@ def stackable(dim): drop_variables = [] drop_variables = set(drop_variables) + # Time bounds coordinates might miss the decoding attributes + if decode_times: + _update_bounds_attributes(variables) + new_vars = OrderedDict() for k, v in iteritems(variables): if k in drop_variables: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 0ca57f98a6d..5b69d9adcc0 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -10,6 +10,7 @@ from xarray import DataArray, Variable, coding, decode_cf from xarray.coding.times import (_import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime) +from xarray.conventions import _update_bounds_attributes from xarray.core.common import contains_cftime_datetimes from . import ( @@ -624,6 +625,41 @@ def test_decode_cf(calendar): assert ds.test.dtype == np.dtype('M8[ns]') +def test_decode_cf_time_bounds(): + + da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)), + coords={'time': [1, 2, 3]}, + dims=('time', 'nbnd'), name='time_bnds') + + attrs = {'units': 'days since 2001-01', + 'calendar': 'standard', + 'bounds': 'time_bnds'} + + ds = da.to_dataset() + ds['time'].attrs.update(attrs) + _update_bounds_attributes(ds.variables) + assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01', + 'calendar': 'standard'} + dsc = decode_cf(ds) + assert dsc.time_bnds.dtype == np.dtype('M8[ns]') + dsc = decode_cf(ds, decode_times=False) + assert dsc.time_bnds.dtype == np.dtype('int64') + + # Do not overwrite existing attrs + ds = da.to_dataset() + ds['time'].attrs.update(attrs) + bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'} + ds['time_bnds'].attrs.update(bnd_attr) + _update_bounds_attributes(ds.variables) + assert ds.variables['time_bnds'].attrs == bnd_attr + + # If bounds variable not available do not complain + ds = da.to_dataset() + ds['time'].attrs.update(attrs) + ds['time'].attrs['bounds'] = 'fake_var' + _update_bounds_attributes(ds.variables) + + @pytest.fixture(params=_ALL_CALENDARS) def calendar(request): return request.param