From eece07932d5498a8abef6a8fbd30d00066931b18 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 13 Nov 2019 18:22:50 -0700 Subject: [PATCH] Harmonize `FillValue` and `missing_value` during encoding and decoding steps (#3502) * Replace `equivalent()` with `allclose_or_equiv()` * Ensure _FillValue & missing_value are cast to same dtype as data's * Use Numpy scalar during type casting * Update ValueError message * Formatting only * Update whats-new.rst --- doc/whats-new.rst | 2 ++ xarray/coding/variables.py | 14 ++++++++++---- xarray/tests/test_coding.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ea3b012cc98..f840557ab5d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,8 @@ New Features Bug fixes ~~~~~~~~~ +- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`) + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5f9c8932b6b..2b5f87ab0cd 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -8,7 +8,6 @@ from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type -from ..core.utils import equivalent from ..core.variable import Variable @@ -152,18 +151,25 @@ def encode(self, variable, name=None): fv = encoding.get("_FillValue") mv = encoding.get("missing_value") - if fv is not None and mv is not None and not equivalent(fv, mv): + if ( + fv is not None + and mv is not None + and not duck_array_ops.allclose_or_equiv(fv, mv) + ): raise ValueError( - "Variable {!r} has multiple fill values {}. " - "Cannot encode data. ".format(name, [fv, mv]) + f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) if fv is not None: + # Ensure _FillValue is cast to same dtype as data's + encoding["_FillValue"] = data.dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: + # Ensure missing_value is cast to same dtype as data's + encoding["missing_value"] = data.dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 6cd584daa96..3e0474e7b60 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -20,6 +20,23 @@ def test_CFMaskCoder_decode(): assert_identical(expected, encoded) +def test_CFMaskCoder_encode_missing_fill_values_conflict(): + original = xr.Variable( + ("x",), + [0.0, -1.0, 1.0], + encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)}, + ) + coder = variables.CFMaskCoder() + encoded = coder.encode(original) + + assert encoded.dtype == encoded.attrs["missing_value"].dtype + assert encoded.dtype == encoded.attrs["_FillValue"].dtype + + with pytest.warns(variables.SerializationWarning): + roundtripped = coder.decode(coder.encode(original)) + assert_identical(roundtripped, original) + + def test_CFMaskCoder_missing_value(): expected = xr.DataArray( np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),