From f5db44933e940c67c5f40ab91e799e232ae0be07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 8 May 2023 14:52:17 +0200 Subject: [PATCH] reverse order of DatetimeCoder and MaskCoder in encoding/decoding, retrieve wanted fill_value in MaskCoder --- xarray/coding/variables.py | 23 ++++++++++++++++++++++- xarray/conventions.py | 16 +++++++++------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5c6e51c2215..0168116cdca 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from xarray.coding import times from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.pycompat import is_duck_dask_array from xarray.core.variable import Variable @@ -239,6 +240,16 @@ def encode(self, variable: Variable, name: T_Name = None): # Ensure _FillValue is cast to same dtype as data's encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) + # retrieve _FillValue in case of np.datetime64 + # see GH 7817 + if np.issubdtype(data.dtype, np.datetime64): + units = encoding.get("units", None) + if isinstance(units, str) and "since" in units: + delta, _ = times._unpack_netcdf_time_units(units) + delta = times._netcdf_to_numpy_timeunit(delta) + fill_value = np.datetime64(fill_value.item(), delta).astype( + "datetime64[ns]" + ) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) @@ -275,7 +286,17 @@ def decode(self, variable: Variable, name: T_Name = None): ) dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) - + # retrieve _FillValue in case of np.datetime64 + # see GH 7817 + if np.issubdtype(data.dtype, np.datetime64) and decoded_fill_value.astype( + np.int64 + ) == np.datetime64("NaT").astype(np.int64): + delta, _ = times._unpack_netcdf_time_units(encoding["units"]) + delta = times._netcdf_to_numpy_timeunit(delta) + encoded_fill_values = { + np.datetime64(encfill.item(), delta).astype("datetime64[ns]") + for encfill in encoded_fill_values + } if encoded_fill_values: transform = partial( _apply_mask, diff --git a/xarray/conventions.py b/xarray/conventions.py index 1506efc31e8..ca7ceeb717c 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -174,10 +174,10 @@ def encode_cf_variable( ensure_not_multiindex(var, name=name) for coder in [ - times.CFDatetimeCoder(), - times.CFTimedeltaCoder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), + times.CFDatetimeCoder(), + times.CFTimedeltaCoder(), variables.UnsignedIntegerCoder(), variables.NonStringCoder(), variables.DefaultFillvalueCoder(), @@ -263,6 +263,13 @@ def decode_cf_variable( var = strings.CharacterArrayCoder().decode(var, name=name) var = strings.EncodedStringCoder().decode(var) + # time decoding before masking + # GH 7817 + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) + if decode_times: + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) + if mask_and_scale: for coder in [ variables.UnsignedIntegerCoder(), @@ -271,11 +278,6 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) - if decode_timedelta: - var = times.CFTimedeltaCoder().decode(var, name=name) - if decode_times: - var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) - if decode_endianness and not var.dtype.isnative: var = variables.EndianCoder().decode(var) original_dtype = var.dtype