Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix time encoding to use int64 and "nanoseconds since 1970-01-01 00:00:00Z" #1299

32 changes: 19 additions & 13 deletions echopype/convert/set_groups_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import xarray as xr

from ..echodata.convention import sonarnetcdf_1
from ..utils.coding import COMPRESSION_SETTINGS, set_time_encodings
from ..utils.coding import COMPRESSION_SETTINGS, DEFAULT_TIME_ENCODING, set_time_encodings
from ..utils.prov import echopype_prov_attrs, source_files_vars

NMEA_SENTENCE_DEFAULT = ["GGA", "GLL", "RMC"]
Expand Down Expand Up @@ -128,11 +128,16 @@ def set_nmea(self) -> xr.Dataset:
"""Set the Platform/NMEA group."""
# Save nan if nmea data is not encoded in the raw file
if len(self.parser_obj.nmea["nmea_string"]) != 0:
# Convert np.datetime64 numbers to seconds since 1900-01-01 00:00:00Z
# Convert np.datetime64 numbers to nanoseconds since 1970-01-01 00:00:00Z
# due to xarray.to_netcdf() error on encoding np.datetime64 objects directly
time = (
self.parser_obj.nmea["timestamp"] - np.datetime64("1900-01-01T00:00:00")
) / np.timedelta64(1, "s")
# print(np.array(self.parser_obj.nmea["timestamp"])[idx_loc].shape)
time, _, _ = xr.coding.times.encode_cf_datetime(
self.parser_obj.nmea["timestamp"],
**{
"units": DEFAULT_TIME_ENCODING["units"],
"calendar": DEFAULT_TIME_ENCODING["calendar"],
},
)
raw_nmea = self.parser_obj.nmea["nmea_string"]
else:
time = [np.nan]
Expand Down Expand Up @@ -215,15 +220,16 @@ def _extract_NMEA_latlon(self):
if nmea_msg
else [np.nan]
)
time1 = (
(
np.array(self.parser_obj.nmea["timestamp"])[idx_loc]
- np.datetime64("1900-01-01T00:00:00")
if nmea_msg:
time1, _, _ = xr.coding.times.encode_cf_datetime(
np.array(self.parser_obj.nmea["timestamp"])[idx_loc],
**{
"units": DEFAULT_TIME_ENCODING["units"],
"calendar": DEFAULT_TIME_ENCODING["calendar"],
},
)
/ np.timedelta64(1, "s")
if nmea_msg
else [np.nan]
)
else:
time1 = [np.nan]

return time1, msg_type, lat, lon

Expand Down
67 changes: 66 additions & 1 deletion echopype/tests/utils/test_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import xarray as xr
import math
import dask
import warnings

from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings
from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings, _encode_dataarray, DEFAULT_TIME_ENCODING

@pytest.mark.parametrize(
"chunk",
Expand Down Expand Up @@ -69,3 +70,67 @@ def test_set_netcdf_encodings():
assert encoding["var2"]["zlib"] is True
assert encoding["var2"]["complevel"] == 5
assert encoding["var3"]["zlib"] is False

@pytest.mark.unit
def test_encode_dataarray_on_nanosecond_resolution_encoding():
"""Test to ensure that the expected warning / lack of warnings comes up."""
# Create an array with a multiple datetime64 elements
datetime_array = np.array(
[
'2023-11-22T16:22:41.088137000',
'2023-11-22T16:22:46.150034000',
'2023-11-22T16:22:51.140442000',
'2023-11-22T16:22:56.143124000'
],
dtype='datetime64[ns]'
)

# This should pass without error since int64 should be sufficient to encompass nanosecond scale granularity
# between time differences in 2023 and 1970
with warnings.catch_warnings():
warnings.simplefilter("error")
decoded_datetime_array = _encode_dataarray(
datetime_array,
)

# Check if datetime_array and decoded_datetime_array are equal
assert np.array_equal(datetime_array, decoded_datetime_array), "Arrays are not equal"

@pytest.mark.unit
def test_encode_dataarray_on_encoded_time_data():
"""Test to ensure that the array equality and expected error hold."""
# Create an array with a multiple datetime64 elements
datetime_array = np.array(
[
'2023-11-22T16:22:41.088137000',
'2023-11-22T16:22:46.150034000',
'2023-11-22T16:22:51.140442000',
'2023-11-22T16:22:56.143124000'
],
dtype='datetime64[ns]'
)

# Encode datetime
encoded_datetime_array, _, _ = xr.coding.times.encode_cf_datetime(
datetime_array, **{
"units": DEFAULT_TIME_ENCODING["units"],
"calendar": DEFAULT_TIME_ENCODING["calendar"],
}
)

# Check that no warning is raised
with warnings.catch_warnings():
warnings.simplefilter("error")
decoded_datetime_array = _encode_dataarray(
encoded_datetime_array
)

# Check if datetime_array and decoded_datetime_array are equal
assert np.array_equal(datetime_array, decoded_datetime_array), "Arrays are not equal"

# Check to see if returns empty array
assert np.array_equal(np.empty(0), _encode_dataarray(np.empty(0)))

# Check to see if value error is raised when we pass in an encoded float datetime array
with pytest.raises(ValueError, match="Encoded time data array must be of type ```np.int64```."):
_encode_dataarray(encoded_datetime_array.astype(np.float64))
30 changes: 17 additions & 13 deletions echopype/utils/coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
from xarray import coding

DEFAULT_TIME_ENCODING = {
"units": "seconds since 1900-01-01T00:00:00+00:00",
"units": "nanoseconds since 1970-01-01T00:00:00Z",
"calendar": "gregorian",
"_FillValue": np.nan,
"dtype": np.dtype("float64"),
"dtype": np.dtype("int64"),
}

COMPRESSION_SETTINGS = {
Expand Down Expand Up @@ -71,24 +70,30 @@ def sanitize_dtypes(ds: xr.Dataset) -> xr.Dataset:
return ds


def _encode_dataarray(da, dtype):
def _encode_dataarray(da):
"""Encodes and decode datetime64 array similar to writing to file"""
if da.size == 0:
return da
read_encoding = {
"units": "seconds since 1900-01-01T00:00:00+00:00",
"calendar": "gregorian",
}

if dtype in [np.float64, np.int64]:
if da.dtype == np.int64:
encoded_data = da
elif da.dtype == np.float64:
raise ValueError("Encoded time data array must be of type ```np.int64```.")
else:
# fmt: off
encoded_data, _, _ = coding.times.encode_cf_datetime(
da, **read_encoding
da, **{
"units": DEFAULT_TIME_ENCODING["units"],
"calendar": DEFAULT_TIME_ENCODING["calendar"],
}
)
# fmt: on
return coding.times.decode_cf_datetime(encoded_data, **read_encoding)
return coding.times.decode_cf_datetime(
encoded_data,
**{
"units": DEFAULT_TIME_ENCODING["units"],
"calendar": DEFAULT_TIME_ENCODING["calendar"],
},
)


def _get_auto_chunk(
Expand Down Expand Up @@ -130,7 +135,6 @@ def set_time_encodings(ds: xr.Dataset) -> xr.Dataset:
_encode_dataarray,
da,
keep_attrs=True,
kwargs={"dtype": da.dtype},
)

new_ds[var].encoding = encoding
Expand Down