Skip to content

Commit

Permalink
default to float64 as encode dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
ctuguinay committed Apr 11, 2024
1 parent 57ec40d commit 46515f2
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
35 changes: 34 additions & 1 deletion echopype/tests/utils/test_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import xarray as xr
import math
import dask
import warnings

from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings
from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings, _encode_dataarray

@pytest.mark.parametrize(
"chunk",
Expand Down Expand Up @@ -69,3 +70,35 @@ def test_set_netcdf_encodings():
assert encoding["var2"]["zlib"] is True
assert encoding["var2"]["complevel"] == 5
assert encoding["var3"]["zlib"] is False

@pytest.mark.unit
def test_encode_dataarray_on_nanosecond_resolution_encoding():
"""Test to ensure that the expected warning / lack of warnings comes up."""
# Create an array with a multiple datetime64 elements
datetime_array = np.array(
[
'2023-11-22T16:22:41.088137000',
'2023-11-22T16:22:46.150034000',
'2023-11-22T16:22:51.140442000',
'2023-11-22T16:22:56.143124000'
],
dtype='datetime64[ns]'
)

# Target encode dtype being None should raise user warning
with pytest.warns(UserWarning, match=r"Times can't be serialized faithfully.*"):
_encode_dataarray(
datetime_array,
da_dtype=datetime_array.dtype,
target_encode_dtype=None
)

# This should pass without error since no warning is called when target encode dtype
# float64 is used.
with warnings.catch_warnings():
warnings.simplefilter("error")
_encode_dataarray(
datetime_array,
da_dtype=datetime_array.dtype,
target_encode_dtype=np.dtype("float64")
)
14 changes: 5 additions & 9 deletions echopype/utils/coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,17 @@ def sanitize_dtypes(ds: xr.Dataset) -> xr.Dataset:
return ds


def _encode_dataarray(da, dtype):
def _encode_dataarray(da, da_dtype, target_encode_dtype=np.dtype("float64")):
"""Encodes and decode datetime64 array similar to writing to file"""
if da.size == 0:
return da
read_encoding = {
"units": "seconds since 1900-01-01T00:00:00+00:00",
"calendar": "gregorian",
}

if dtype in [np.float64, np.int64]:
read_encoding = {"units": "seconds since 1900-01-01T00:00:00+00:00", "calendar": "gregorian"}
if da_dtype in [np.float64, np.int64]:
encoded_data = da
else:
# fmt: off
encoded_data, _, _ = coding.times.encode_cf_datetime(
da, **read_encoding
da, dtype=target_encode_dtype, **read_encoding
)
# fmt: on
return coding.times.decode_cf_datetime(encoded_data, **read_encoding)
Expand Down Expand Up @@ -130,7 +126,7 @@ def set_time_encodings(ds: xr.Dataset) -> xr.Dataset:
_encode_dataarray,
da,
keep_attrs=True,
kwargs={"dtype": da.dtype},
kwargs={"da_dtype": da.dtype},
)

new_ds[var].encoding = encoding
Expand Down

0 comments on commit 46515f2

Please sign in to comment.