OSOceanAcoustics · leewujung · Apr 17, 2024 · Apr 11, 2024 · Apr 13, 2024 · Apr 13, 2024
diff --git a/echopype/convert/set_groups_base.py b/echopype/convert/set_groups_base.py
@@ -7,7 +7,7 @@
 import xarray as xr
 
 from ..echodata.convention import sonarnetcdf_1
-from ..utils.coding import COMPRESSION_SETTINGS, set_time_encodings
+from ..utils.coding import COMPRESSION_SETTINGS, DEFAULT_TIME_ENCODING, set_time_encodings
 from ..utils.prov import echopype_prov_attrs, source_files_vars
 
 NMEA_SENTENCE_DEFAULT = ["GGA", "GLL", "RMC"]
@@ -128,11 +128,16 @@ def set_nmea(self) -> xr.Dataset:
         """Set the Platform/NMEA group."""
         # Save nan if nmea data is not encoded in the raw file
         if len(self.parser_obj.nmea["nmea_string"]) != 0:
-            # Convert np.datetime64 numbers to seconds since 1900-01-01 00:00:00Z
+            # Convert np.datetime64 numbers to nanoseconds since 1970-01-01 00:00:00Z
             # due to xarray.to_netcdf() error on encoding np.datetime64 objects directly
-            time = (
-                self.parser_obj.nmea["timestamp"] - np.datetime64("1900-01-01T00:00:00")
-            ) / np.timedelta64(1, "s")
+            # print(np.array(self.parser_obj.nmea["timestamp"])[idx_loc].shape)
+            time, _, _ = xr.coding.times.encode_cf_datetime(
+                self.parser_obj.nmea["timestamp"],
+                **{
+                    "units": DEFAULT_TIME_ENCODING["units"],
+                    "calendar": DEFAULT_TIME_ENCODING["calendar"],
+                },
+            )
             raw_nmea = self.parser_obj.nmea["nmea_string"]
         else:
             time = [np.nan]
@@ -215,15 +220,16 @@ def _extract_NMEA_latlon(self):
             if nmea_msg
             else [np.nan]
         )
-        time1 = (
-            (
-                np.array(self.parser_obj.nmea["timestamp"])[idx_loc]
-                - np.datetime64("1900-01-01T00:00:00")
+        if nmea_msg:
+            time1, _, _ = xr.coding.times.encode_cf_datetime(
+                np.array(self.parser_obj.nmea["timestamp"])[idx_loc],
+                **{
+                    "units": DEFAULT_TIME_ENCODING["units"],
+                    "calendar": DEFAULT_TIME_ENCODING["calendar"],
+                },
             )
-            / np.timedelta64(1, "s")
-            if nmea_msg
-            else [np.nan]
-        )
+        else:
+            time1 = [np.nan]
 
         return time1, msg_type, lat, lon
 

diff --git a/echopype/tests/utils/test_coding.py b/echopype/tests/utils/test_coding.py
@@ -3,8 +3,9 @@
 import xarray as xr
 import math
 import dask
+import warnings
 
-from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings
+from echopype.utils.coding import _get_auto_chunk, set_netcdf_encodings, _encode_dataarray, DEFAULT_TIME_ENCODING
 
 @pytest.mark.parametrize(
     "chunk",
@@ -69,3 +70,67 @@ def test_set_netcdf_encodings():
     assert encoding["var2"]["zlib"] is True
     assert encoding["var2"]["complevel"] == 5
     assert encoding["var3"]["zlib"] is False
+
+@pytest.mark.unit
+def test_encode_dataarray_on_nanosecond_resolution_encoding():
+    """Test to ensure that the expected warning / lack of warnings comes up."""
+    # Create an array with a multiple datetime64 elements
+    datetime_array = np.array(
+        [
+            '2023-11-22T16:22:41.088137000', 
+            '2023-11-22T16:22:46.150034000',
+            '2023-11-22T16:22:51.140442000', 
+            '2023-11-22T16:22:56.143124000'
+        ],
+        dtype='datetime64[ns]'
+    )
+
+    # This should pass without error since int64 should be sufficient to encompass nanosecond scale granularity
+    # between time differences in 2023 and 1970
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        decoded_datetime_array = _encode_dataarray(
+            datetime_array,
+        )
+
+    # Check if datetime_array and decoded_datetime_array are equal
+    assert np.array_equal(datetime_array, decoded_datetime_array), "Arrays are not equal"
+
+@pytest.mark.unit
+def test_encode_dataarray_on_encoded_time_data():
+    """Test to ensure that the array equality and expected error hold."""
+    # Create an array with a multiple datetime64 elements
+    datetime_array = np.array(
+        [
+            '2023-11-22T16:22:41.088137000', 
+            '2023-11-22T16:22:46.150034000',
+            '2023-11-22T16:22:51.140442000', 
+            '2023-11-22T16:22:56.143124000'
+        ],
+        dtype='datetime64[ns]'
+    )
+
+    # Encode datetime
+    encoded_datetime_array, _, _ = xr.coding.times.encode_cf_datetime(
+            datetime_array, **{
+                "units": DEFAULT_TIME_ENCODING["units"],
+                "calendar": DEFAULT_TIME_ENCODING["calendar"],
+            }
+        )
+
+    # Check that no warning is raised
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        decoded_datetime_array = _encode_dataarray(
+            encoded_datetime_array
+        )
+
+    # Check if datetime_array and decoded_datetime_array are equal
+    assert np.array_equal(datetime_array, decoded_datetime_array), "Arrays are not equal"
+
+    # Check to see if returns empty array
+    assert np.array_equal(np.empty(0), _encode_dataarray(np.empty(0)))
+
+    # Check to see if value error is raised when we pass in an encoded float datetime array
+    with pytest.raises(ValueError, match="Encoded time data array must be of type ```np.int64```."):
+        _encode_dataarray(encoded_datetime_array.astype(np.float64))
diff --git a/echopype/utils/coding.py b/echopype/utils/coding.py
@@ -9,10 +9,9 @@
 from xarray import coding
 
 DEFAULT_TIME_ENCODING = {
-    "units": "seconds since 1900-01-01T00:00:00+00:00",
+    "units": "nanoseconds since 1970-01-01T00:00:00Z",
     "calendar": "gregorian",
-    "_FillValue": np.nan,
-    "dtype": np.dtype("float64"),
+    "dtype": np.dtype("int64"),
 }
 
 COMPRESSION_SETTINGS = {
@@ -71,24 +70,30 @@ def sanitize_dtypes(ds: xr.Dataset) -> xr.Dataset:
     return ds
 
 
-def _encode_dataarray(da, dtype):
+def _encode_dataarray(da):
     """Encodes and decode datetime64 array similar to writing to file"""
     if da.size == 0:
         return da
-    read_encoding = {
-        "units": "seconds since 1900-01-01T00:00:00+00:00",
-        "calendar": "gregorian",
-    }
-
-    if dtype in [np.float64, np.int64]:
+    if da.dtype == np.int64:
         encoded_data = da
+    elif da.dtype == np.float64:
+        raise ValueError("Encoded time data array must be of type ```np.int64```.")
     else:
         # fmt: off
         encoded_data, _, _ = coding.times.encode_cf_datetime(
-            da, **read_encoding
+            da, **{
+                "units": DEFAULT_TIME_ENCODING["units"],
+                "calendar": DEFAULT_TIME_ENCODING["calendar"],
+            }
         )
         # fmt: on
-    return coding.times.decode_cf_datetime(encoded_data, **read_encoding)
+    return coding.times.decode_cf_datetime(
+        encoded_data,
+        **{
+            "units": DEFAULT_TIME_ENCODING["units"],
+            "calendar": DEFAULT_TIME_ENCODING["calendar"],
+        },
+    )
 
 
 def _get_auto_chunk(
@@ -130,7 +135,6 @@ def set_time_encodings(ds: xr.Dataset) -> xr.Dataset:
                     _encode_dataarray,
                     da,
                     keep_attrs=True,
-                    kwargs={"dtype": da.dtype},
                 )
 
             new_ds[var].encoding = encoding