Skip to content

Commit

Permalink
reset order of encoding/decoding but special case times in CFMaskCoder
Browse files Browse the repository at this point in the history
  • Loading branch information
kmuehlbauer committed May 8, 2023
1 parent f5db449 commit 23c565c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 32 deletions.
45 changes: 22 additions & 23 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,20 +236,19 @@ def encode(self, variable: Variable, name: T_Name = None):
f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
)

# cast to correct dtype in case of times
# see GH 7817
units = attrs.get("units", None)
if isinstance(units, str) and "since" in units:
encoded_dtype = encoding.pop("dtype")
if encoded_dtype is not None and encoded_dtype != data.dtype:
data = np.asarray(data, dtype=encoded_dtype)

if fv_exists:
# Ensure _FillValue is cast to same dtype as data's
encoding["_FillValue"] = dtype.type(fv)
fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
# retrieve _FillValue in case of np.datetime64
# see GH 7817
if np.issubdtype(data.dtype, np.datetime64):
units = encoding.get("units", None)
if isinstance(units, str) and "since" in units:
delta, _ = times._unpack_netcdf_time_units(units)
delta = times._netcdf_to_numpy_timeunit(delta)
fill_value = np.datetime64(fill_value.item(), delta).astype(
"datetime64[ns]"
)

if not pd.isnull(fill_value):
data = duck_array_ops.fillna(data, fill_value)

Expand Down Expand Up @@ -284,19 +283,19 @@ def decode(self, variable: Variable, name: T_Name = None):
SerializationWarning,
stacklevel=3,
)

dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
# retrieve _FillValue in case of np.datetime64
# see GH 7817
if np.issubdtype(data.dtype, np.datetime64) and decoded_fill_value.astype(
np.int64
) == np.datetime64("NaT").astype(np.int64):
delta, _ = times._unpack_netcdf_time_units(encoding["units"])
delta = times._netcdf_to_numpy_timeunit(delta)
encoded_fill_values = {
np.datetime64(encfill.item(), delta).astype("datetime64[ns]")
for encfill in encoded_fill_values
}
units = attrs.get("units", None)
# try to cast to correct dtypes for data and fill_value
# GH 7817
if ((
isinstance(units, str)
and "since" in units
and np.issubdtype(data.dtype, np.integer)) or np.issubdtype(data.dtype, np.datetime64)
):
dtype, decoded_fill_value = data.dtype, np.datetime64("NaT").astype(
data.dtype
)
else:
dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
if encoded_fill_values:
transform = partial(
_apply_mask,
Expand Down
16 changes: 7 additions & 9 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ def encode_cf_variable(
ensure_not_multiindex(var, name=name)

for coder in [
variables.CFScaleOffsetCoder(),
variables.CFMaskCoder(),
times.CFDatetimeCoder(),
times.CFTimedeltaCoder(),
variables.CFScaleOffsetCoder(),
variables.CFMaskCoder(),
variables.UnsignedIntegerCoder(),
variables.NonStringCoder(),
variables.DefaultFillvalueCoder(),
Expand Down Expand Up @@ -263,13 +263,6 @@ def decode_cf_variable(
var = strings.CharacterArrayCoder().decode(var, name=name)
var = strings.EncodedStringCoder().decode(var)

# time decoding before masking
# GH 7817
if decode_timedelta:
var = times.CFTimedeltaCoder().decode(var, name=name)
if decode_times:
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)

if mask_and_scale:
for coder in [
variables.UnsignedIntegerCoder(),
Expand All @@ -278,6 +271,11 @@ def decode_cf_variable(
]:
var = coder.decode(var, name=name)

if decode_timedelta:
var = times.CFTimedeltaCoder().decode(var, name=name)
if decode_times:
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)

if decode_endianness and not var.dtype.isnative:
var = variables.EndianCoder().decode(var)
original_dtype = var.dtype
Expand Down

0 comments on commit 23c565c

Please sign in to comment.