diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b28beb9e3b2..eb7d33c94e3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,10 @@ Enhancements - Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` object (:issue:`1993`) By `Keisuke Fujii `_. + - Handle variables with different values for ``missing_value`` and + ``_FillValue`` by masking values for both attributes; previously this + resulted in a ``ValueError``. (:issue:`2016`) + By `Ryan May `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index ced535643a5..4e61e7d4722 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from ..core import dtypes, duck_array_ops, indexing, utils +from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type from ..core.variable import Variable @@ -152,26 +152,12 @@ def encode(self, variable, name=None): def decode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_decoding(variable) - if 'missing_value' in attrs: - # missing_value is deprecated, but we still want to support it as - # an alias for _FillValue. - if ('_FillValue' in attrs and - not utils.equivalent(attrs['_FillValue'], - attrs['missing_value'])): - raise ValueError("Conflicting _FillValue and missing_value " - "attrs on a variable {!r}: {} vs. {}\n\n" - "Consider opening the offending dataset " - "using decode_cf=False, correcting the " - "attrs and decoding explicitly using " - "xarray.decode_cf()." - .format(name, attrs['_FillValue'], - attrs['missing_value'])) - attrs['_FillValue'] = attrs.pop('missing_value') - - if '_FillValue' in attrs: - raw_fill_value = pop_to(attrs, encoding, '_FillValue', name=name) - encoded_fill_values = [ - fv for fv in np.ravel(raw_fill_value) if not pd.isnull(fv)] + raw_fill_values = [pop_to(attrs, encoding, attr, name=name) + for attr in ('missing_value', '_FillValue')] + if raw_fill_values: + encoded_fill_values = {fv for option in raw_fill_values + for fv in np.ravel(option) + if not pd.isnull(fv)} if len(encoded_fill_values) > 1: warnings.warn("variable {!r} has multiple fill values {}, " diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 7028bac7057..5b82f5a26f2 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -156,12 +156,15 @@ def test(self): def test_decode_cf_with_conflicting_fill_missing_value(): - var = Variable(['t'], np.arange(10), + expected = Variable(['t'], [np.nan, np.nan, 2], {'units': 'foobar'}) + var = Variable(['t'], np.arange(3), {'units': 'foobar', 'missing_value': 0, '_FillValue': 1}) - with raises_regex(ValueError, "_FillValue and missing_value"): - conventions.decode_cf_variable('t', var) + with warnings.catch_warnings(record=True) as w: + actual = conventions.decode_cf_variable('t', var) + assert_identical(actual, expected) + assert 'has multiple fill' in str(w[0].message) expected = Variable(['t'], np.arange(10), {'units': 'foobar'})