From 477cd5876ccf8d3afbf3bf6317aa1ec5881a016c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 09:46:33 +0100
Subject: [PATCH 01/15] correctly encode/decode _FillValues

---
 xarray/coding/variables.py    | 77 ++++++++++++++++++++++++++++-------
 xarray/tests/test_backends.py | 72 +++++++++++++++++---------------
 2 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index c3d57ad1903..120bff0b594 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -280,8 +280,10 @@ def encode(self, variable: Variable, name: T_Name = None):
                     data = duck_array_ops.fillna(data, fill_value)
 
         if mv_exists:
+            # Use _FillValue if available to align missing_value to prevent issues
+            # when decoding
             # Ensure missing_value is cast to same dtype as data's
-            encoding["missing_value"] = dtype.type(mv)
+            encoding["missing_value"] = attrs.get("_FillValue", dtype.type(mv))
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and not fv_exists:
                 if is_time_like and data.dtype.kind in "iu":
@@ -322,7 +324,13 @@ def decode(self, variable: Variable, name: T_Name = None):
             if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
                 dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
             else:
-                dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
+                if "scale_factor" not in attrs and "add_offset" not in attrs:
+                    dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
+                else:
+                    dtype, decoded_fill_value = (
+                        _choose_float_dtype(data.dtype, attrs),
+                        np.nan,
+                    )
 
             if encoded_fill_values:
                 transform = partial(
@@ -347,23 +355,51 @@ def _scale_offset_decoding(data, scale_factor, add_offset, dtype: np.typing.DTyp
     return data
 
 
-def _choose_float_dtype(dtype: np.dtype, has_offset: bool) -> type[np.floating[Any]]:
+def _choose_float_dtype(
+    dtype: np.dtype, mapping: MutableMapping
+) -> type[np.floating[Any]]:
     """Return a float dtype that can losslessly represent `dtype` values."""
-    # Keep float32 as-is.  Upcast half-precision to single-precision,
+    # check scale/offset first to derive wanted float dtype
+    # see https://github.com/pydata/xarray/issues/5597#issuecomment-879561954
+    scale_factor = mapping.get("scale_factor")
+    add_offset = mapping.get("add_offset")
+    if scale_factor is not None or add_offset is not None:
+        # get the type from scale_factor/add_offset to determine
+        # the needed floating point type
+        if scale_factor is not None:
+            scale_type = type(scale_factor)
+        if add_offset is not None:
+            offset_type = type(add_offset)
+        # CF conforming, both scale_factor and add-offset are given and
+        # of same floating point type (float32/64)
+        if (
+            add_offset is not None
+            and scale_factor is not None
+            and offset_type == scale_type
+            and scale_type in [np.float32, np.float64]
+        ):
+            # in case of int32 -> we need upcast to float64
+            # due to precision issues
+            if dtype.itemsize == 4 and np.issubdtype(dtype, np.integer):
+                return np.float64
+            return np.dtype(scale_type).type
+        # Not CF conforming and add_offset given:
+        # A scale factor is entirely safe (vanishing into the mantissa),
+        # but a large integer offset could lead to loss of precision.
+        # Sensitivity analysis can be tricky, so we just use a float64
+        # if there's any offset at all - better unoptimised than wrong!
+        if add_offset is not None:
+            return np.float64
+        # return float32 in other cases where only scale_factor is given
+        return np.float32
+    # If no scale_factor or add_offset is given, use some general rules.
+    # Keep float32 as-is. Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"
     if dtype.itemsize <= 4 and np.issubdtype(dtype, np.floating):
         return np.float32
     # float32 can exactly represent all integers up to 24 bits
     if dtype.itemsize <= 2 and np.issubdtype(dtype, np.integer):
-        # A scale factor is entirely safe (vanishing into the mantissa),
-        # but a large integer offset could lead to loss of precision.
-        # Sensitivity analysis can be tricky, so we just use a float64
-        # if there's any offset at all - better unoptimised than wrong!
-        if not has_offset:
-            return np.float32
-    # For all other types and circumstances, we just use float64.
-    # (safe because eg. complex numbers are not supported in NetCDF)
-    return np.float64
+        return np.float32
 
 
 class CFScaleOffsetCoder(VariableCoder):
@@ -377,7 +413,12 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         dims, data, attrs, encoding = unpack_for_encoding(variable)
 
         if "scale_factor" in encoding or "add_offset" in encoding:
-            dtype = _choose_float_dtype(data.dtype, "add_offset" in encoding)
+            # if we have a _FillValue/masked_value we do not want to cast now
+            # but leave that to CFMaskCoder
+            dtype = data.dtype
+            if "_FillValue" not in encoding and "missing_value" not in encoding:
+                dtype = _choose_float_dtype(data.dtype, encoding)
+            # but still we need a copy prevent changing original data
             data = duck_array_ops.astype(data, dtype=dtype, copy=True)
         if "add_offset" in encoding:
             data -= pop_to(encoding, attrs, "add_offset", name=name)
@@ -393,11 +434,17 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
 
             scale_factor = pop_to(attrs, encoding, "scale_factor", name=name)
             add_offset = pop_to(attrs, encoding, "add_offset", name=name)
-            dtype = _choose_float_dtype(data.dtype, "add_offset" in encoding)
             if np.ndim(scale_factor) > 0:
                 scale_factor = np.asarray(scale_factor).item()
             if np.ndim(add_offset) > 0:
                 add_offset = np.asarray(add_offset).item()
+            # if we have a _FillValue/masked_value we already have the wanted
+            # floating point dtype here (via CFMaskCoder), so no check is necessary
+            # only check in other cases
+            dtype = data.dtype
+            if "_FillValue" not in encoding and "missing_value" not in encoding:
+                dtype = _choose_float_dtype(dtype, encoding)
+
             transform = partial(
                 _scale_offset_decoding,
                 scale_factor=scale_factor,
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 0eaa7ee7361..5e2ae8e4804 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -142,96 +142,100 @@ def open_example_mfdataset(names, *args, **kwargs) -> Dataset:
     )
 
 
-def create_masked_and_scaled_data() -> Dataset:
-    x = np.array([np.nan, np.nan, 10, 10.1, 10.2], dtype=np.float32)
+def create_masked_and_scaled_data(dtype: np.dtype) -> Dataset:
+    x = np.array([np.nan, np.nan, 10, 10.1, 10.2], dtype=dtype)
     encoding = {
         "_FillValue": -1,
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
         "dtype": "i2",
     }
     return Dataset({"x": ("t", x, {}, encoding)})
 
 
-def create_encoded_masked_and_scaled_data() -> Dataset:
-    attributes = {"_FillValue": -1, "add_offset": 10, "scale_factor": np.float32(0.1)}
+def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset:
+    attributes = {
+        "_FillValue": -1,
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
+    }
     return Dataset(
         {"x": ("t", np.array([-1, -1, 0, 1, 2], dtype=np.int16), attributes)}
     )
 
 
-def create_unsigned_masked_scaled_data() -> Dataset:
+def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     encoding = {
         "_FillValue": 255,
         "_Unsigned": "true",
         "dtype": "i1",
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
-    x = np.array([10.0, 10.1, 22.7, 22.8, np.nan], dtype=np.float32)
+    x = np.array([10.0, 10.1, 22.7, 22.8, np.nan], dtype=dtype)
     return Dataset({"x": ("t", x, {}, encoding)})
 
 
-def create_encoded_unsigned_masked_scaled_data() -> Dataset:
+def create_encoded_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     # These are values as written to the file: the _FillValue will
     # be represented in the signed form.
     attributes = {
         "_FillValue": -1,
         "_Unsigned": "true",
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
     # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
     sb = np.asarray([0, 1, 127, -128, -1], dtype="i1")
     return Dataset({"x": ("t", sb, attributes)})
 
 
-def create_bad_unsigned_masked_scaled_data() -> Dataset:
+def create_bad_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     encoding = {
         "_FillValue": 255,
         "_Unsigned": True,
         "dtype": "i1",
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
-    x = np.array([10.0, 10.1, 22.7, 22.8, np.nan], dtype=np.float32)
+    x = np.array([10.0, 10.1, 22.7, 22.8, np.nan], dtype=dtype)
     return Dataset({"x": ("t", x, {}, encoding)})
 
 
-def create_bad_encoded_unsigned_masked_scaled_data() -> Dataset:
+def create_bad_encoded_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     # These are values as written to the file: the _FillValue will
     # be represented in the signed form.
     attributes = {
         "_FillValue": -1,
         "_Unsigned": True,
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
     # Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
     sb = np.asarray([0, 1, 127, -128, -1], dtype="i1")
     return Dataset({"x": ("t", sb, attributes)})
 
 
-def create_signed_masked_scaled_data() -> Dataset:
+def create_signed_masked_scaled_data(dtype: np.dtype) -> Dataset:
     encoding = {
         "_FillValue": -127,
         "_Unsigned": "false",
         "dtype": "i1",
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
-    x = np.array([-1.0, 10.1, 22.7, np.nan], dtype=np.float32)
+    x = np.array([-1.0, 10.1, 22.7, np.nan], dtype=dtype)
     return Dataset({"x": ("t", x, {}, encoding)})
 
 
-def create_encoded_signed_masked_scaled_data() -> Dataset:
+def create_encoded_signed_masked_scaled_data(dtype: np.dtype) -> Dataset:
     # These are values as written to the file: the _FillValue will
     # be represented in the signed form.
     attributes = {
         "_FillValue": -127,
         "_Unsigned": "false",
-        "add_offset": 10,
-        "scale_factor": np.float32(0.1),
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
     }
     # Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
     sb = np.asarray([-110, 1, 127, -127], dtype="i1")
@@ -889,9 +893,10 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
             (create_masked_and_scaled_data, create_encoded_masked_and_scaled_data),
         ],
     )
-    def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn) -> None:
-        decoded = decoded_fn()
-        encoded = encoded_fn()
+    @pytest.mark.parametrize("dtype", [np.dtype("float64"), np.dtype("float32")])
+    def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
+        decoded = decoded_fn(dtype)
+        encoded = encoded_fn(dtype)
 
         with self.roundtrip(decoded) as actual:
             for k in decoded.variables:
@@ -912,7 +917,7 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn) -> None:
 
         # make sure roundtrip encoding didn't change the
         # original dataset.
-        assert_allclose(encoded, encoded_fn(), decode_bytes=False)
+        assert_allclose(encoded, encoded_fn(dtype), decode_bytes=False)
 
         with self.roundtrip(encoded) as actual:
             for k in decoded.variables:
@@ -1645,6 +1650,7 @@ def test_mask_and_scale(self) -> None:
                 v.add_offset = 10
                 v.scale_factor = 0.1
                 v[:] = np.array([-1, -1, 0, 1, 2])
+                dtype = type(v.scale_factor)
 
             # first make sure netCDF4 reads the masked and scaled data
             # correctly
@@ -1653,11 +1659,13 @@ def test_mask_and_scale(self) -> None:
                     [-1, -1, 10, 10.1, 10.2], mask=[True, True, False, False, False]
                 )
                 actual = nc.variables["x"][:]
+                print(actual.dtype, expected.dtype)
                 assert_array_equal(expected, actual)
 
             # now check xarray
             with open_dataset(tmp_file) as ds:
-                expected = create_masked_and_scaled_data()
+                expected = create_masked_and_scaled_data(np.dtype(dtype))
+                print(ds.variables["x"].dtype, expected.variables["x"].dtype)
                 assert_identical(expected, ds)
 
     def test_0dimensional_variable(self) -> None:

From 792e9429c56cf55ed5cebfd05f50db14ea5bbb30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 10:05:43 +0100
Subject: [PATCH 02/15] fix mypy

---
 xarray/coding/variables.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 120bff0b594..4ea7cb8c0e6 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -400,6 +400,9 @@ def _choose_float_dtype(
     # float32 can exactly represent all integers up to 24 bits
     if dtype.itemsize <= 2 and np.issubdtype(dtype, np.integer):
         return np.float32
+    # For all other types and circumstances, we just use float64.
+    # (safe because eg. complex numbers are not supported in NetCDF)
+    return np.float64
 
 
 class CFScaleOffsetCoder(VariableCoder):

From f743fd1df3c92c47c6eecd5a9e1a2d6ebd3b14ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 11:14:27 +0100
Subject: [PATCH 03/15] fix CFMaskCode test

---
 xarray/tests/test_coding.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index f7579c4b488..dce65dbe11c 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -51,9 +51,8 @@ def test_CFMaskCoder_encode_missing_fill_values_conflict(data, encoding) -> None
     assert encoded.dtype == encoded.attrs["missing_value"].dtype
     assert encoded.dtype == encoded.attrs["_FillValue"].dtype
 
-    with pytest.warns(variables.SerializationWarning):
-        roundtripped = decode_cf_variable("foo", encoded)
-        assert_identical(roundtripped, original)
+    roundtripped = decode_cf_variable("foo", encoded)
+    assert_identical(roundtripped, original)
 
 
 def test_CFMaskCoder_missing_value() -> None:

From 551719a2cde48477a84a695c45b703eb2327dfa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 12:57:31 +0100
Subject: [PATCH 04/15] fix scale/offset

---
 xarray/coding/variables.py  |  4 ++--
 xarray/tests/test_coding.py | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 4ea7cb8c0e6..7c1ad25b778 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -390,8 +390,8 @@ def _choose_float_dtype(
         # if there's any offset at all - better unoptimised than wrong!
         if add_offset is not None:
             return np.float64
-        # return float32 in other cases where only scale_factor is given
-        return np.float32
+        # return dtype depending on given scale_factor
+        return np.dtype(scale_type).type
     # If no scale_factor or add_offset is given, use some general rules.
     # Keep float32 as-is. Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"
diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index dce65dbe11c..6d81d6f5dc8 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -95,16 +95,18 @@ def test_coder_roundtrip() -> None:
 
 
 @pytest.mark.parametrize("dtype", "u1 u2 i1 i2 f2 f4".split())
-def test_scaling_converts_to_float32(dtype) -> None:
+@pytest.mark.parametrize("dtype2", "f4 f8".split())
+def test_scaling_converts_to_float(dtype: str, dtype2: str) -> None:
+    dt = np.dtype(dtype2)
     original = xr.Variable(
-        ("x",), np.arange(10, dtype=dtype), encoding=dict(scale_factor=10)
+        ("x",), np.arange(10, dtype=dtype), encoding=dict(scale_factor=dt.type(10))
     )
     coder = variables.CFScaleOffsetCoder()
     encoded = coder.encode(original)
-    assert encoded.dtype == np.float32
+    assert encoded.dtype == dt
     roundtripped = coder.decode(encoded)
     assert_identical(original, roundtripped)
-    assert roundtripped.dtype == np.float32
+    assert roundtripped.dtype == dt
 
 
 @pytest.mark.parametrize("scale_factor", (10, [10]))

From 85730e2e618d1e563a20796cb6fc3d9b2f9427fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 13:49:31 +0100
Subject: [PATCH 05/15] avert zarr issue

---
 xarray/tests/test_backends.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5e2ae8e4804..e5346d5ed5a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -895,9 +895,10 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
     )
     @pytest.mark.parametrize("dtype", [np.dtype("float64"), np.dtype("float32")])
     def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
+        if hasattr(self, "zarr_version") and dtype == np.float32:
+            pytest.skip("float32 will be treated as float64 in zarr")
         decoded = decoded_fn(dtype)
         encoded = encoded_fn(dtype)
-
         with self.roundtrip(decoded) as actual:
             for k in decoded.variables:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype

From 5fe874fc1791184f8ac602d7e2eef251cc957fc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Tue, 6 Feb 2024 15:47:03 +0100
Subject: [PATCH 06/15] add whats-new.rst entry

---
 doc/whats-new.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 1b0f2f18efb..489d8d29bcd 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -66,6 +66,10 @@ Bug fixes
   lead to integer overflow or unsafe conversion from floating point to integer
   values (:issue:`8542`, :pull:`8575`).  By `Spencer Clark
   <https://github.com/spencerkclark>`_.
+- CF conform handling of `_FillValue`/`missing_value` and `dtype` in
+  `CFMaskCoder`/`CFScaleOffsetCoder` (:issue:`2304`, :issue:`5597`,
+  :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Documentation
 ~~~~~~~~~~~~~

From f8488728dba64f2e0aada8842a904b7d618c99b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Wed, 7 Feb 2024 12:23:34 +0100
Subject: [PATCH 07/15] refactor fillvalue/missing value check to catch
 non-conforming values, apply review suggestions

---
 xarray/coding/variables.py    | 133 +++++++++++++++++++---------------
 xarray/tests/test_backends.py |   2 -
 2 files changed, 76 insertions(+), 59 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 7c1ad25b778..136224f8a17 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -243,6 +243,44 @@ def _is_time_like(units):
         return any(tstr == units for tstr in time_strings)
 
 
+def _check_fill_values(attrs, name, dtype):
+    """ "Check _FillValue and missing_value if available.
+
+    Return dictionary with raw fill values and set with encoded fill values.
+
+    Issue SerializationWarning if appropriate.
+    """
+    raw_fill_dict = {}
+    [
+        pop_to(attrs, raw_fill_dict, attr, name=name)
+        for attr in ("missing_value", "_FillValue")
+    ]
+    encoded_fill_values = set()
+    for k in list(raw_fill_dict):
+        v = raw_fill_dict[k]
+        kfill = {fv for fv in np.ravel(v) if not pd.isnull(fv)}
+        if not kfill and np.issubdtype(dtype, np.integer):
+            warnings.warn(
+                f"variable {name!r} has non-conforming {k!r} "
+                f"{v!r} defined, dropping {k!r} entirely.",
+                SerializationWarning,
+                stacklevel=3,
+            )
+            del raw_fill_dict[k]
+        else:
+            encoded_fill_values |= kfill
+
+        if len(encoded_fill_values) > 1:
+            warnings.warn(
+                f"variable {name!r} has multiple fill values "
+                f"{encoded_fill_values} defined, decoding all values to NaN.",
+                SerializationWarning,
+                stacklevel=3,
+            )
+
+    return raw_fill_dict, encoded_fill_values
+
+
 class CFMaskCoder(VariableCoder):
     """Mask or unmask fill values according to CF conventions."""
 
@@ -264,75 +302,56 @@ def encode(self, variable: Variable, name: T_Name = None):
                 f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
             )
 
-        # special case DateTime to properly handle NaT
-        is_time_like = _is_time_like(attrs.get("units"))
-
         if fv_exists:
             # Ensure _FillValue is cast to same dtype as data's
             encoding["_FillValue"] = dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
-            if not pd.isnull(fill_value):
-                if is_time_like and data.dtype.kind in "iu":
-                    data = duck_array_ops.where(
-                        data != np.iinfo(np.int64).min, data, fill_value
-                    )
-                else:
-                    data = duck_array_ops.fillna(data, fill_value)
 
         if mv_exists:
-            # Use _FillValue if available to align missing_value to prevent issues
-            # when decoding
-            # Ensure missing_value is cast to same dtype as data's
+            # try to use _FillValue, if it exists to align both values
+            # or use missing_value and ensure it's cast to same dtype as data's
             encoding["missing_value"] = attrs.get("_FillValue", dtype.type(mv))
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
-            if not pd.isnull(fill_value) and not fv_exists:
-                if is_time_like and data.dtype.kind in "iu":
-                    data = duck_array_ops.where(
-                        data != np.iinfo(np.int64).min, data, fill_value
-                    )
-                else:
-                    data = duck_array_ops.fillna(data, fill_value)
+
+        # apply fillna
+        if not pd.isnull(fill_value):
+            # special case DateTime to properly handle NaT
+            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
+                data = duck_array_ops.where(
+                    data != np.iinfo(np.int64).min, data, fill_value
+                )
+            else:
+                data = duck_array_ops.fillna(data, fill_value)
 
         return Variable(dims, data, attrs, encoding, fastpath=True)
 
     def decode(self, variable: Variable, name: T_Name = None):
-        dims, data, attrs, encoding = unpack_for_decoding(variable)
+        raw_fill_dict, encoded_fill_values = _check_fill_values(
+            variable.attrs, name, variable.dtype
+        )
 
-        raw_fill_values = [
-            pop_to(attrs, encoding, attr, name=name)
-            for attr in ("missing_value", "_FillValue")
-        ]
-        if raw_fill_values:
-            encoded_fill_values = {
-                fv
-                for option in raw_fill_values
-                for fv in np.ravel(option)
-                if not pd.isnull(fv)
-            }
-
-            if len(encoded_fill_values) > 1:
-                warnings.warn(
-                    "variable {!r} has multiple fill values {}, "
-                    "decoding all values to NaN.".format(name, encoded_fill_values),
-                    SerializationWarning,
-                    stacklevel=3,
-                )
+        if raw_fill_dict:
+            dims, data, attrs, encoding = unpack_for_decoding(variable)
+            [
+                safe_setitem(encoding, attr, value, name=name)
+                for attr, value in raw_fill_dict.items()
+            ]
 
-            # special case DateTime to properly handle NaT
-            dtype: np.typing.DTypeLike
-            decoded_fill_value: Any
-            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
-                dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
-            else:
-                if "scale_factor" not in attrs and "add_offset" not in attrs:
-                    dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
+            if encoded_fill_values:
+                # special case DateTime to properly handle NaT
+                dtype: np.typing.DTypeLike
+                decoded_fill_value: Any
+                if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
+                    dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
                 else:
-                    dtype, decoded_fill_value = (
-                        _choose_float_dtype(data.dtype, attrs),
-                        np.nan,
-                    )
+                    if "scale_factor" not in attrs and "add_offset" not in attrs:
+                        dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
+                    else:
+                        dtype, decoded_fill_value = (
+                            _choose_float_dtype(data.dtype, attrs),
+                            np.nan,
+                        )
 
-            if encoded_fill_values:
                 transform = partial(
                     _apply_mask,
                     encoded_fill_values=encoded_fill_values,
@@ -367,9 +386,9 @@ def _choose_float_dtype(
         # get the type from scale_factor/add_offset to determine
         # the needed floating point type
         if scale_factor is not None:
-            scale_type = type(scale_factor)
+            scale_type = np.dtype(type(scale_factor))
         if add_offset is not None:
-            offset_type = type(add_offset)
+            offset_type = np.dtype(type(add_offset))
         # CF conforming, both scale_factor and add-offset are given and
         # of same floating point type (float32/64)
         if (
@@ -382,7 +401,7 @@ def _choose_float_dtype(
             # due to precision issues
             if dtype.itemsize == 4 and np.issubdtype(dtype, np.integer):
                 return np.float64
-            return np.dtype(scale_type).type
+            return scale_type
         # Not CF conforming and add_offset given:
         # A scale factor is entirely safe (vanishing into the mantissa),
         # but a large integer offset could lead to loss of precision.
@@ -391,7 +410,7 @@ def _choose_float_dtype(
         if add_offset is not None:
             return np.float64
         # return dtype depending on given scale_factor
-        return np.dtype(scale_type).type
+        return scale_type
     # If no scale_factor or add_offset is given, use some general rules.
     # Keep float32 as-is. Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index e5346d5ed5a..2251821d82a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1660,13 +1660,11 @@ def test_mask_and_scale(self) -> None:
                     [-1, -1, 10, 10.1, 10.2], mask=[True, True, False, False, False]
                 )
                 actual = nc.variables["x"][:]
-                print(actual.dtype, expected.dtype)
                 assert_array_equal(expected, actual)
 
             # now check xarray
             with open_dataset(tmp_file) as ds:
                 expected = create_masked_and_scaled_data(np.dtype(dtype))
-                print(ds.variables["x"].dtype, expected.variables["x"].dtype)
                 assert_identical(expected, ds)
 
     def test_0dimensional_variable(self) -> None:

From 672e84e404d6242f119d39d60dd56f082ac2b5b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Wed, 7 Feb 2024 12:32:52 +0100
Subject: [PATCH 08/15] fix typing

---
 xarray/coding/variables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 136224f8a17..8bd08db92f0 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -401,7 +401,7 @@ def _choose_float_dtype(
             # due to precision issues
             if dtype.itemsize == 4 and np.issubdtype(dtype, np.integer):
                 return np.float64
-            return scale_type
+            return scale_type.type
         # Not CF conforming and add_offset given:
         # A scale factor is entirely safe (vanishing into the mantissa),
         # but a large integer offset could lead to loss of precision.
@@ -410,7 +410,7 @@ def _choose_float_dtype(
         if add_offset is not None:
             return np.float64
         # return dtype depending on given scale_factor
-        return scale_type
+        return scale_type.type
     # If no scale_factor or add_offset is given, use some general rules.
     # Keep float32 as-is. Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"

From bff3a5d2db19413bef7ba4f817e95d275b074458 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Wed, 7 Feb 2024 12:52:06 +0100
Subject: [PATCH 09/15] suppress warning in doc

---
 doc/user-guide/indexing.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst
index 90b7cbaf2a9..fba9dd585ab 100644
--- a/doc/user-guide/indexing.rst
+++ b/doc/user-guide/indexing.rst
@@ -549,6 +549,7 @@ __ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-in
 You can also assign values to all variables of a :py:class:`Dataset` at once:
 
 .. ipython:: python
+    :okwarning:
 
     ds_org = xr.tutorial.open_dataset("eraint_uvz").isel(
         latitude=slice(56, 59), longitude=slice(255, 258), level=0

From f90e7e0fe26ea5a3123b07581811f627be19b271 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Fri, 15 Mar 2024 08:20:58 +0100
Subject: [PATCH 10/15] FIX: silence SerializationWarnings

---
 xarray/tests/test_conventions.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 91a8e368de5..c83b74408c3 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -63,7 +63,25 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
         np.arange(10),
         {"units": "foobar", "missing_value": np.nan, "_FillValue": np.nan},
     )
-    actual = conventions.decode_cf_variable("t", var)
+
+    expected_warnings = {
+        (
+            SerializationWarning,
+            "variable 't' has non-conforming 'missing_value' nan defined, "
+            "dropping 'missing_value' entirely.",
+        ),
+        (
+            SerializationWarning,
+            "variable 't' has non-conforming '_FillValue' nan defined, "
+            "dropping '_FillValue' entirely.",
+        ),
+    }
+
+    with pytest.warns(Warning) as winfo:
+        actual = conventions.decode_cf_variable("t", var)
+    actual_warnings = {(warn.category, warn.message.args[0]) for warn in winfo}
+    assert actual_warnings == expected_warnings
+
     assert_identical(actual, expected)
 
     var = Variable(
@@ -75,7 +93,10 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
             "_FillValue": np.float32(np.nan),
         },
     )
-    actual = conventions.decode_cf_variable("t", var)
+    with pytest.warns(Warning) as winfo:
+        actual = conventions.decode_cf_variable("t", var)
+    actual_warnings = {(warn.category, warn.message.args[0]) for warn in winfo}
+    assert actual_warnings == expected_warnings
     assert_identical(actual, expected)
 
 
@@ -122,6 +143,8 @@ def test_missing_fillvalue(self) -> None:
         with pytest.warns(Warning, match="floating point data as an integer"):
             conventions.encode_cf_variable(v)
 
+        conventions.encode_cf_variable(v)
+
     def test_multidimensional_coordinates(self) -> None:
         # regression test for GH1763
         # Set up test case with coordinates that have overlapping (but not

From da2222e894e8508f2b9b1a0e69ddb24e62a57904 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kai.muehlbauer@uni-bonn.de>
Date: Fri, 15 Mar 2024 08:38:42 +0100
Subject: [PATCH 11/15] FIX: silence mypy by casting to string early

---
 xarray/tests/test_conventions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index c83b74408c3..e4584b4d0c1 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -79,7 +79,7 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
 
     with pytest.warns(Warning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
-    actual_warnings = {(warn.category, warn.message.args[0]) for warn in winfo}
+    actual_warnings = {(warn.category, str(warn.message)) for warn in winfo}
     assert actual_warnings == expected_warnings
 
     assert_identical(actual, expected)
@@ -95,7 +95,7 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
     )
     with pytest.warns(Warning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
-    actual_warnings = {(warn.category, warn.message.args[0]) for warn in winfo}
+    actual_warnings = {(warn.category, str(warn.message)) for warn in winfo}
     assert actual_warnings == expected_warnings
     assert_identical(actual, expected)
 

From f6fe9cb863251bc43d873f66836c21a4b6271c4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Fri, 15 Mar 2024 16:50:32 +0100
Subject: [PATCH 12/15] Update xarray/tests/test_conventions.py

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 xarray/tests/test_conventions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index e4584b4d0c1..b46824c7321 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -143,7 +143,6 @@ def test_missing_fillvalue(self) -> None:
         with pytest.warns(Warning, match="floating point data as an integer"):
             conventions.encode_cf_variable(v)
 
-        conventions.encode_cf_variable(v)
 
     def test_multidimensional_coordinates(self) -> None:
         # regression test for GH1763

From 6b30298f5d61a5bebc11953c82f8736d408f9fa4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 15 Mar 2024 15:51:03 +0000
Subject: [PATCH 13/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/tests/test_conventions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index b46824c7321..38161563029 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -143,7 +143,6 @@ def test_missing_fillvalue(self) -> None:
         with pytest.warns(Warning, match="floating point data as an integer"):
             conventions.encode_cf_variable(v)
 
-
     def test_multidimensional_coordinates(self) -> None:
         # regression test for GH1763
         # Set up test case with coordinates that have overlapping (but not

From 1d1b1a0ad0f710aef7f4927895f94dab8cb7082d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Fri, 15 Mar 2024 17:06:08 +0100
Subject: [PATCH 14/15] Shorten test, add comment checking for two warnings

---
 xarray/tests/test_conventions.py | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 38161563029..a1293b4da93 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -64,23 +64,12 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
         {"units": "foobar", "missing_value": np.nan, "_FillValue": np.nan},
     )
 
-    expected_warnings = {
-        (
-            SerializationWarning,
-            "variable 't' has non-conforming 'missing_value' nan defined, "
-            "dropping 'missing_value' entirely.",
-        ),
-        (
-            SerializationWarning,
-            "variable 't' has non-conforming '_FillValue' nan defined, "
-            "dropping '_FillValue' entirely.",
-        ),
-    }
-
+    # the following code issues two warnings, so we need to check for both
     with pytest.warns(Warning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
-    actual_warnings = {(warn.category, str(warn.message)) for warn in winfo}
-    assert actual_warnings == expected_warnings
+    for aw in winfo:
+        assert aw.category == SerializationWarning
+        assert "non-conforming" in str(aw.message)
 
     assert_identical(actual, expected)
 
@@ -93,10 +82,13 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
             "_FillValue": np.float32(np.nan),
         },
     )
+
+    # the following code issues two warnings, so we need to check for both
     with pytest.warns(Warning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
-    actual_warnings = {(warn.category, str(warn.message)) for warn in winfo}
-    assert actual_warnings == expected_warnings
+    for aw in winfo:
+        assert aw.category == SerializationWarning
+        assert "non-conforming" in str(aw.message)
     assert_identical(actual, expected)
 
 

From 30985fa0ded7109ca55a1424a509d8c96c459317 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= <kmuehlbauer@wradlib.org>
Date: Fri, 15 Mar 2024 17:11:20 +0100
Subject: [PATCH 15/15] make test even shorter

---
 xarray/tests/test_conventions.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index a1293b4da93..fdfea3c3fe8 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -65,10 +65,9 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
     )
 
     # the following code issues two warnings, so we need to check for both
-    with pytest.warns(Warning) as winfo:
+    with pytest.warns(SerializationWarning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
     for aw in winfo:
-        assert aw.category == SerializationWarning
         assert "non-conforming" in str(aw.message)
 
     assert_identical(actual, expected)
@@ -84,10 +83,9 @@ def test_decode_cf_with_conflicting_fill_missing_value() -> None:
     )
 
     # the following code issues two warnings, so we need to check for both
-    with pytest.warns(Warning) as winfo:
+    with pytest.warns(SerializationWarning) as winfo:
         actual = conventions.decode_cf_variable("t", var)
     for aw in winfo:
-        assert aw.category == SerializationWarning
         assert "non-conforming" in str(aw.message)
     assert_identical(actual, expected)