From a2e9260da208e6ed91d0f1671d76bd83e3736c8d Mon Sep 17 00:00:00 2001 From: Emilio Mayorga Date: Fri, 4 Aug 2023 08:52:14 -0700 Subject: [PATCH] Add new Provenance combination_* attributes with combine_echodata, mirroring conversion_* attributes (#1113) --- echopype/echodata/combine.py | 10 +++++---- echopype/echodata/echodata.py | 22 ++++++++++++++----- .../tests/echodata/test_echodata_combine.py | 14 +++++++++++- echopype/utils/prov.py | 2 +- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/echopype/echodata/combine.py b/echopype/echodata/combine.py index 403c72666..bd08f84fc 100644 --- a/echopype/echodata/combine.py +++ b/echopype/echodata/combine.py @@ -1,4 +1,3 @@ -import datetime import itertools import re from pathlib import Path @@ -13,6 +12,7 @@ from ..utils.io import validate_output_path from ..utils.log import _init_logger +from ..utils.prov import echopype_prov_attrs from .echodata import EchoData logger = _init_logger(__name__) @@ -742,11 +742,13 @@ def _combine( combined_ds.attrs.update( { "is_combined": True, - "conversion_time": datetime.datetime.utcnow().strftime( - "%Y-%m-%dT%H:%M:%SZ" - ), + "conversion_software_name": group_attrs["conversion_software_name"], + "conversion_software_version": group_attrs["conversion_software_version"], + "conversion_time": group_attrs["conversion_time"], } ) + prov_dict = echopype_prov_attrs(process_type="combination") + combined_ds = combined_ds.assign_attrs(prov_dict) # Data holding tree_dict[ed_group] = combined_ds diff --git a/echopype/echodata/echodata.py b/echopype/echodata/echodata.py index 57f9da522..30249b714 100644 --- a/echopype/echodata/echodata.py +++ b/echopype/echodata/echodata.py @@ -3,7 +3,7 @@ import warnings from html import escape from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Tuple +from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Tuple, Union import fsspec import numpy as np @@ -196,16 +196,28 @@ def _load_tree(self) -> None: setattr(self, group, node) @property - def version_info(self) -> Tuple[int]: - if self["Provenance"].attrs.get("conversion_software_name", None) == "echopype": - version_str = self["Provenance"].attrs.get("conversion_software_version", None) + def version_info(self) -> Union[Tuple[int], None]: + def _get_version_tuple(provenance_type): + """ + Parameters + ---------- + provenance_type : str + Either conversion or combination + """ + version_str = self["Provenance"].attrs.get(f"{provenance_type}_software_version", None) if version_str is not None: if version_str.startswith("v"): # Removes v in case of v0.4.x or less version_str = version_str.strip("v") version_num = version_str.split(".")[:3] return tuple([int(i) for i in version_num]) - return None + + if self["Provenance"].attrs.get("combination_software_name", None) == "echopype": + return _get_version_tuple("combination") + elif self["Provenance"].attrs.get("conversion_software_name", None) == "echopype": + return _get_version_tuple("conversion") + else: + return None @property def nbytes(self) -> float: diff --git a/echopype/tests/echodata/test_echodata_combine.py b/echopype/tests/echodata/test_echodata_combine.py index 37f0169e3..f1f5b6f33 100644 --- a/echopype/tests/echodata/test_echodata_combine.py +++ b/echopype/tests/echodata/test_echodata_combine.py @@ -1,3 +1,4 @@ +from datetime import datetime from textwrap import dedent from pathlib import Path import tempfile @@ -160,6 +161,18 @@ def test_combine_echodata(raw_datasets): combined = echopype.combine_echodata(eds) + # Test Provenance conversion and combination attributes + for attr_token in ["software_name", "software_version", "time"]: + assert f"conversion_{attr_token}" in combined['Provenance'].attrs + assert f"combination_{attr_token}" in combined['Provenance'].attrs + + def attr_time_to_dt(time_str): + return datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%SZ') + assert ( + attr_time_to_dt(combined['Provenance'].attrs['conversion_time']) <= + attr_time_to_dt(combined['Provenance'].attrs['combination_time']) + ) + # get all possible dimensions that should be dropped # these correspond to the attribute arrays created all_drop_dims = [] @@ -174,7 +187,6 @@ def test_combine_echodata(raw_datasets): all_drop_dims.append("echodata_filename") for group_name in combined.group_paths: - # get all Datasets to be combined combined_group: xr.Dataset = combined[group_name] eds_groups = [ diff --git a/echopype/utils/prov.py b/echopype/utils/prov.py index b46173dd0..935ddfb4b 100644 --- a/echopype/utils/prov.py +++ b/echopype/utils/prov.py @@ -12,7 +12,7 @@ from .log import _init_logger -ProcessType = Literal["conversion", "processing", "mask"] +ProcessType = Literal["conversion", "combination", "processing", "mask"] # Note that this PathHint is defined differently from the one in ..core PathHint = Union[str, Path] PathSequenceHint = Union[List[PathHint], Tuple[PathHint], NDArray[PathHint]]