Skip to content

Commit

Permalink
Harmonize FillValue and missing_value during encoding and decodin…
Browse files Browse the repository at this point in the history
…g steps (#3502)

* Replace `equivalent()` with `allclose_or_equiv()`

* Ensure _FillValue & missing_value are cast to same dtype as data's

* Use Numpy scalar during type casting

* Update ValueError message

* Formatting only

* Update whats-new.rst
  • Loading branch information
andersy005 authored and max-sixty committed Nov 14, 2019
1 parent 810345c commit eece079
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 4 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ New Features

Bug fixes
~~~~~~~~~
- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
By `Anderson Banihirwe <https://github.com/andersy005>`_.
- Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
- Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
Expand Down
14 changes: 10 additions & 4 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from ..core import dtypes, duck_array_ops, indexing
from ..core.pycompat import dask_array_type
from ..core.utils import equivalent
from ..core.variable import Variable


Expand Down Expand Up @@ -152,18 +151,25 @@ def encode(self, variable, name=None):
fv = encoding.get("_FillValue")
mv = encoding.get("missing_value")

if fv is not None and mv is not None and not equivalent(fv, mv):
if (
fv is not None
and mv is not None
and not duck_array_ops.allclose_or_equiv(fv, mv)
):
raise ValueError(
"Variable {!r} has multiple fill values {}. "
"Cannot encode data. ".format(name, [fv, mv])
f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
)

if fv is not None:
# Ensure _FillValue is cast to same dtype as data's
encoding["_FillValue"] = data.dtype.type(fv)
fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
if not pd.isnull(fill_value):
data = duck_array_ops.fillna(data, fill_value)

if mv is not None:
# Ensure missing_value is cast to same dtype as data's
encoding["missing_value"] = data.dtype.type(mv)
fill_value = pop_to(encoding, attrs, "missing_value", name=name)
if not pd.isnull(fill_value) and fv is None:
data = duck_array_ops.fillna(data, fill_value)
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ def test_CFMaskCoder_decode():
assert_identical(expected, encoded)


def test_CFMaskCoder_encode_missing_fill_values_conflict():
original = xr.Variable(
("x",),
[0.0, -1.0, 1.0],
encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)},
)
coder = variables.CFMaskCoder()
encoded = coder.encode(original)

assert encoded.dtype == encoded.attrs["missing_value"].dtype
assert encoded.dtype == encoded.attrs["_FillValue"].dtype

with pytest.warns(variables.SerializationWarning):
roundtripped = coder.decode(coder.encode(original))
assert_identical(roundtripped, original)


def test_CFMaskCoder_missing_value():
expected = xr.DataArray(
np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),
Expand Down

0 comments on commit eece079

Please sign in to comment.