Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CF: also decode time bounds when available #2571

Merged
merged 7 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ Enhancements
- :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
:py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
By `Stephan Hoyer <https://github.com/shoyer>`_
- Time bounds variables are now also decoded according to CF conventions
(:issue:`2565`).
By `Fabien Maussion <https://github.com/fmaussion>`_.

Bug fixes
~~~~~~~~~
Expand Down
34 changes: 33 additions & 1 deletion xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
return Variable(dimensions, data, attributes, encoding=encoding)


def _update_bounds_attributes(variables):
"""Adds time attributes to time bounds variables.

Variables handling time bounds ("Cell boundaries" in the CF
conventions) do not necessarily carry the necessary attributes to be
decoded. This copies the attributes from the time variable to the
associated boundaries.

See Also:

http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
cf-conventions.html#cell-boundaries

https://github.com/pydata/xarray/issues/2565
"""

# For all time variables with bounds
for v in variables.values():
attrs = v.attrs
has_date_units = 'units' in attrs and 'since' in attrs['units']
if has_date_units and 'bounds' in attrs:
if attrs['bounds'] in variables:
to_update = variables[attrs['bounds']].attrs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a clearer name would be bounds_attrs?

Suggested change
to_update = variables[attrs['bounds']].attrs
bounds_attrs = variables[attrs['bounds']].attrs

to_update.setdefault('units', attrs['units'])
if 'calendar' in attrs:
to_update.setdefault('calendar', attrs['calendar'])


def decode_cf_variables(variables, attributes, concat_characters=True,
mask_and_scale=True, decode_times=True,
decode_coords=True, drop_variables=None):
"""
Decode a several CF encoded variables.
Decode several CF encoded variables.

See: decode_cf_variable
"""
Expand All @@ -350,6 +378,10 @@ def stackable(dim):
drop_variables = []
drop_variables = set(drop_variables)

# Time bounds coordinates might miss the decoding attributes
if decode_times:
_update_bounds_attributes(variables)

new_vars = OrderedDict()
for k, v in iteritems(variables):
if k in drop_variables:
Expand Down
36 changes: 36 additions & 0 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from xarray import DataArray, Variable, coding, decode_cf
from xarray.coding.times import (_import_cftime, cftime_to_nptime,
decode_cf_datetime, encode_cf_datetime)
from xarray.conventions import _update_bounds_attributes
from xarray.core.common import contains_cftime_datetimes

from . import (
Expand Down Expand Up @@ -624,6 +625,41 @@ def test_decode_cf(calendar):
assert ds.test.dtype == np.dtype('M8[ns]')


def test_decode_cf_time_bounds():

da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
coords={'time': [1, 2, 3]},
dims=('time', 'nbnd'), name='time_bnds')

attrs = {'units': 'days since 2001-01',
'calendar': 'standard',
'bounds': 'time_bnds'}

ds = da.to_dataset()
ds['time'].attrs.update(attrs)
_update_bounds_attributes(ds.variables)
assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
'calendar': 'standard'}
dsc = decode_cf(ds)
assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
dsc = decode_cf(ds, decode_times=False)
assert dsc.time_bnds.dtype == np.dtype('int64')

# Do not overwrite existing attrs
ds = da.to_dataset()
ds['time'].attrs.update(attrs)
bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
ds['time_bnds'].attrs.update(bnd_attr)
_update_bounds_attributes(ds.variables)
assert ds.variables['time_bnds'].attrs == bnd_attr

# If bounds variable not available do not complain
ds = da.to_dataset()
ds['time'].attrs.update(attrs)
ds['time'].attrs['bounds'] = 'fake_var'
_update_bounds_attributes(ds.variables)


@pytest.fixture(params=_ALL_CALENDARS)
def calendar(request):
return request.param
Expand Down