diff --git a/docs/source/whats-new.md b/docs/source/whats-new.md index ce80fa3cc..9f8b6d98c 100644 --- a/docs/source/whats-new.md +++ b/docs/source/whats-new.md @@ -4,6 +4,26 @@ What's new See [GitHub releases page](https://github.com/OSOceanAcoustics/echopype/releases) for the complete history. +# v0.8.3 (2024 December 24) + +## Overview + +This release includes a bug fix for changes from the previous release and a few functionality enhancements. + +## Enhancements +- Add parser support for EK80 MRU1 datagram (#1242) +- Add support for `consolidate` subpackage functions to accept both in-memory or stored datasets (#1216) +- Add test for ES60 spare field decoding issue (#1233) +- Add test for EK80 missing `receiver_sampling_freq` error (#1234) + +## Bug fixes +- Fixed reshape bug in `pad_shorter_ping` that was remnant from `use_swap` full refactoring (#1234) + + + + + + # v0.8.2 (2023 November 20) ## Overview diff --git a/echopype/consolidate/api.py b/echopype/consolidate/api.py index 364c76be3..64235e91c 100644 --- a/echopype/consolidate/api.py +++ b/echopype/consolidate/api.py @@ -1,5 +1,6 @@ import datetime import pathlib +from pathlib import Path from typing import Optional, Union import numpy as np @@ -8,14 +9,14 @@ from ..calibrate.ek80_complex import get_filter_coeff from ..echodata import EchoData from ..echodata.simrad import retrieve_correct_beam_group -from ..utils.io import validate_source_ds_da +from ..utils.io import get_file_format, open_source from ..utils.prov import add_processing_level -from .split_beam_angle import add_angle_to_ds, get_angle_complex_samples, get_angle_power_samples +from .split_beam_angle import get_angle_complex_samples, get_angle_power_samples POSITION_VARIABLES = ["latitude", "longitude"] -def swap_dims_channel_frequency(ds: xr.Dataset) -> xr.Dataset: +def swap_dims_channel_frequency(ds: Union[xr.Dataset, str, pathlib.Path]) -> xr.Dataset: """ Use frequency_nominal in place of channel to be dataset dimension and coorindate. @@ -24,8 +25,9 @@ def swap_dims_channel_frequency(ds: xr.Dataset) -> xr.Dataset: Parameters ---------- - ds : xr.Dataset - Dataset for which the dimension will be swapped + ds : xr.Dataset or str or pathlib.Path + Dataset or path to a file containing the Dataset + for which the dimension will be swapped Returns ------- @@ -35,6 +37,7 @@ def swap_dims_channel_frequency(ds: xr.Dataset) -> xr.Dataset: ----- This operation is only possible when there are no duplicated frequencies present in the file. """ + ds = open_source(ds, "dataset", {}) # Only possible if no duplicated frequencies if np.unique(ds["frequency_nominal"]).size == ds["frequency_nominal"].size: return ( @@ -50,7 +53,7 @@ def swap_dims_channel_frequency(ds: xr.Dataset) -> xr.Dataset: def add_depth( - ds: xr.Dataset, + ds: Union[xr.Dataset, str, pathlib.Path], depth_offset: float = 0, tilt: float = 0, downward: bool = True, @@ -64,8 +67,9 @@ def add_depth( Parameters ---------- - ds : xr.Dataset - Source Sv dataset to which a depth variable will be added. + ds : xr.Dataset or str or pathlib.Path + Source Sv dataset or path to a file containing the Source Sv dataset + to which a depth variable will be added. Must contain `echo_range`. depth_offset : float Offset along the vertical (depth) dimension to account for actual transducer @@ -114,6 +118,7 @@ def add_depth( # else: # tilt = 0 + ds = open_source(ds, "dataset", {}) # Multiplication factor depending on if transducers are pointing downward mult = 1 if downward else -1 @@ -132,7 +137,11 @@ def add_depth( @add_processing_level("L2A") -def add_location(ds: xr.Dataset, echodata: EchoData = None, nmea_sentence: Optional[str] = None): +def add_location( + ds: Union[xr.Dataset, str, pathlib.Path], + echodata: Optional[Union[EchoData, str, pathlib.Path]], + nmea_sentence: Optional[str] = None, +): """ Add geographical location (latitude/longitude) to the Sv dataset. @@ -142,10 +151,12 @@ def add_location(ds: xr.Dataset, echodata: EchoData = None, nmea_sentence: Optio Parameters ---------- - ds : xr.Dataset - An Sv or MVBS dataset for which the geographical locations will be added to - echodata - An `EchoData` object holding the raw data + ds : xr.Dataset or str or pathlib.Path + An Sv or MVBS dataset or path to a file containing the Sv or MVBS + dataset for which the geographical locations will be added to + echodata : EchoData or str or pathlib.Path + An ``EchoData`` object or path to a file containing the ``EchoData`` + object holding the raw data nmea_sentence NMEA sentence to select a subset of location data (optional) @@ -174,6 +185,9 @@ def sel_interp(var, time_dim_name): # Values may be nan if there are ping_time values outside the time_dim_name range return position_var.interp(**{time_dim_name: ds["ping_time"]}) + ds = open_source(ds, "dataset", {}) + echodata = open_source(echodata, "echodata", {}) + if "longitude" not in echodata["Platform"] or echodata["Platform"]["longitude"].isnull().all(): raise ValueError("Coordinate variables not present or all nan") @@ -198,12 +212,12 @@ def sel_interp(var, time_dim_name): def add_splitbeam_angle( source_Sv: Union[xr.Dataset, str, pathlib.Path], - echodata: EchoData, + echodata: Union[EchoData, str, pathlib.Path], waveform_mode: str, encode_mode: str, pulse_compression: bool = False, storage_options: dict = {}, - return_dataset: bool = True, + to_disk: bool = True, ) -> xr.Dataset: """ Add split-beam (alongship/athwartship) angles into the Sv dataset. @@ -218,8 +232,9 @@ def add_splitbeam_angle( source_Sv: xr.Dataset or str or pathlib.Path The Sv Dataset or path to a file containing the Sv Dataset, to which the split-beam angles will be added - echodata: EchoData - An ``EchoData`` object holding the raw data + echodata: EchoData or str or pathlib.Path + An ``EchoData`` object or path to a file containing the ``EchoData`` + object holding the raw data waveform_mode : {"CW", "BB"} Type of transmit waveform @@ -240,19 +255,20 @@ def add_splitbeam_angle( storage_options: dict, default={} Any additional parameters for the storage backend, corresponding to the path provided for ``source_Sv`` - return_dataset: bool, default=True - If ``True``, ``source_Sv`` with split-beam angles added will be returned. - ``return_dataset=False`` is useful when ``source_Sv`` is a path and + to_disk: bool, default=True + If ``False``, ``to_disk`` with split-beam angles added will be returned. + ``to_disk=True`` is useful when ``source_Sv`` is a path and users only want to write the split-beam angle data to this path. Returns ------- xr.Dataset or None - If ``return_dataset=False``, nothing will be returned. - If ``return_dataset=True``, either the input dataset ``source_Sv`` + If ``to_disk=False``, nothing will be returned. + If ``to_disk=True``, either the input dataset ``source_Sv`` or a lazy-loaded Dataset (from the path ``source_Sv``) with split-beam angles added will be returned. + Raises ------ ValueError @@ -279,6 +295,19 @@ def add_splitbeam_angle( `echodata`` will be identical. If this is not the case, only angle data corresponding to channels existing in ``source_Sv`` will be added. """ + # ensure that when source_Sv is a Dataset then to_disk should be False + if not isinstance(source_Sv, (str, Path)) and to_disk: + raise ValueError( + "The input source_Sv must be a path when to_disk=True, " + "so that the split-beam angles can be written to disk!" + ) + + # obtain the file format of source_Sv if it is a path + if isinstance(source_Sv, (str, Path)): + source_Sv_type = get_file_format(source_Sv) + + source_Sv = open_source(source_Sv, "dataset", storage_options) + echodata = open_source(echodata, "echodata", storage_options) # ensure that echodata was produced by EK60 or EK80-like sensors if echodata.sonar_model not in ["EK60", "ES70", "EK80", "ES80", "EA640"]: @@ -287,22 +316,6 @@ def add_splitbeam_angle( "transducers, split-beam angles cannot be added to source_Sv!" ) - # validate the source_Sv type or path (if it is provided) - source_Sv, file_type = validate_source_ds_da(source_Sv, storage_options) - - # initialize source_Sv_path - source_Sv_path = None - - if isinstance(source_Sv, str): - # store source_Sv path so we can use it to write to later - source_Sv_path = source_Sv - - # TODO: In the future we can improve this by obtaining the variable names, channels, - # and dimension lengths directly from source_Sv using zarr or netcdf4. This would - # prevent the unnecessary loading in of the coordinates, which the below statement does. - # open up Dataset using source_Sv path - source_Sv = xr.open_dataset(source_Sv, engine=file_type, chunks={}, **storage_options) - # raise not implemented error if source_Sv corresponds to MVBS if source_Sv.attrs["processing_function"] == "commongrid.compute_MVBS": raise NotImplementedError("Adding split-beam data to MVBS has not been implemented!") @@ -364,9 +377,18 @@ def add_splitbeam_angle( theta, phi = get_angle_complex_samples(ds_beam, angle_params) # add theta and phi to source_Sv input - source_Sv = add_angle_to_ds( - theta, phi, source_Sv, return_dataset, source_Sv_path, file_type, storage_options - ) + theta.attrs["long_name"] = "split-beam alongship angle" + phi.attrs["long_name"] = "split-beam athwartship angle" + + # add the split-beam angles to the provided Dataset + source_Sv["angle_alongship"] = theta + source_Sv["angle_athwartship"] = phi + if to_disk: + if source_Sv_type == "netcdf4": + source_Sv.to_netcdf(mode="a", **storage_options) + else: + source_Sv.to_zarr(mode="a", **storage_options) + source_Sv = open_source(source_Sv, "dataset", storage_options) # Add history attribute history_attr = ( diff --git a/echopype/consolidate/split_beam_angle.py b/echopype/consolidate/split_beam_angle.py index 4f9333216..aec49b66a 100644 --- a/echopype/consolidate/split_beam_angle.py +++ b/echopype/consolidate/split_beam_angle.py @@ -2,7 +2,7 @@ Contains functions necessary to compute the split-beam (alongship/athwartship) angles and add them to a Dataset. """ -from typing import List, Optional, Tuple +from typing import List, Tuple import numpy as np import xarray as xr @@ -245,79 +245,3 @@ def get_angle_complex_samples( ) return theta, phi - - -def add_angle_to_ds( - theta: xr.Dataset, - phi: xr.Dataset, - ds: xr.Dataset, - return_dataset: bool, - source_ds_path: Optional[str] = None, - file_type: Optional[str] = None, - storage_options: dict = {}, -) -> Optional[xr.Dataset]: - """ - Adds the split-beam angle data to the provided input ``ds``. - - Parameters - ---------- - theta: xr.Dataset - The calculated split-beam alongship angle - phi: xr.Dataset - The calculated split-beam athwartship angle - ds: xr.Dataset - The Dataset that ``theta`` and ``phi`` will be added to - return_dataset: bool - Whether a dataset will be returned or not - source_ds_path: str, optional - The path to the file corresponding to ``ds``, if it exists - file_type: {"netcdf4", "zarr"}, optional - The file type corresponding to ``source_ds_path`` - storage_options: dict, default={} - Any additional parameters for the storage backend, corresponding to the - path ``source_ds_path`` - - Returns - ------- - xr.Dataset or None - If ``return_dataset=False``, nothing will be returned. If ``return_dataset=True`` - either the input dataset ``ds`` or a lazy-loaded Dataset (obtained from - the path provided by ``source_ds_path``) with the split-beam angle data added - will be returned. - """ - - # TODO: do we want to add anymore attributes to these variables? - # add appropriate attributes to theta and phi - theta.attrs["long_name"] = "split-beam alongship angle" - phi.attrs["long_name"] = "split-beam athwartship angle" - - if source_ds_path is not None: - # put the variables into a Dataset, so they can be written at the same time - # add ds attributes to splitb_ds since they will be overwritten by to_netcdf/zarr - splitb_ds = xr.Dataset( - data_vars={"angle_alongship": theta, "angle_athwartship": phi}, - coords=theta.coords, - attrs=ds.attrs, - ) - - # release any resources linked to ds (necessary for to_netcdf) - ds.close() - - # write the split-beam angle data to the provided path - if file_type == "netcdf4": - splitb_ds.to_netcdf(path=source_ds_path, mode="a", **storage_options) - else: - splitb_ds.to_zarr(store=source_ds_path, mode="a", **storage_options) - - if return_dataset: - # open up and return Dataset in source_ds_path - return xr.open_dataset(source_ds_path, engine=file_type, chunks={}, **storage_options) - - else: - # add the split-beam angles to the provided Dataset - ds["angle_alongship"] = theta - ds["angle_athwartship"] = phi - - if return_dataset: - # return input dataset with split-beam angle data - return ds diff --git a/echopype/convert/parse_base.py b/echopype/convert/parse_base.py index 8bd585503..23a08d52f 100644 --- a/echopype/convert/parse_base.py +++ b/echopype/convert/parse_base.py @@ -600,8 +600,14 @@ def pad_shorter_ping(data_list) -> np.ndarray: lens = np.array([len(item) for item in data_list]) if np.unique(lens).size != 1: # if some pings have different lengths along range if data_list[0].ndim == 2: - # Angle data have an extra dimension for alongship and athwartship samples - mask = lens[:, None, None] > np.array([np.arange(lens.max())] * 2).T + # Data may have an extra dimension: + # - Angle data have an extra dimension for alongship and athwartship samples + # - Complex data have an extra dimension for different transducer sectors + mask = ( + lens[:, None, None] + > np.array([np.arange(lens.max())] * data_list[0].shape[1]).T + ) + else: mask = lens[:, None] > np.arange(lens.max()) diff --git a/echopype/convert/set_groups_ek80.py b/echopype/convert/set_groups_ek80.py index 8759b5369..0938ac6b4 100644 --- a/echopype/convert/set_groups_ek80.py +++ b/echopype/convert/set_groups_ek80.py @@ -406,6 +406,17 @@ def set_platform(self) -> xr.Dataset: "standard_name": "sound_frequency", }, ), + "heading": ( + ["time2"], + np.array(self.parser_obj.mru.get("heading", [np.nan])), + { + "long_name": "Platform heading (true)", + "standard_name": "platform_orientation", + "units": "degrees_north", + "valid_min": 0.0, + "valid_max": 360.0, + }, + ), }, coords={ "channel": ( diff --git a/echopype/convert/utils/ek_raw_parsers.py b/echopype/convert/utils/ek_raw_parsers.py index b259e619f..4a7118659 100644 --- a/echopype/convert/utils/ek_raw_parsers.py +++ b/echopype/convert/utils/ek_raw_parsers.py @@ -502,6 +502,16 @@ class SimradMRUParser(_SimradDatagramParser): pitch: float heading: float + type: string == 'MRU1' + low_date: long uint representing LSBytes of 64bit NT date + high_date: long uint representing MSBytes of 64bit NT date + timestamp: datetime.datetime object of NT date, assumed to be UTC + heave: float + roll : float + pitch: float + heading: float + length: long uint + The following methods are defined: from_string(str): parse a raw ER60 NMEA datagram @@ -522,7 +532,17 @@ def __init__(self): ("roll", "f"), ("pitch", "f"), ("heading", "f"), - ] + ], + 1: [ + ("type", "4s"), + ("low_date", "L"), + ("high_date", "L"), + ("heave", "f"), + ("roll", "f"), + ("pitch", "f"), + ("heading", "f"), + ("length", "L"), + ], } _SimradDatagramParser.__init__(self, "MRU", headers) diff --git a/echopype/echodata/echodata.py b/echopype/echodata/echodata.py index 6fd8fc079..1f04a8abc 100644 --- a/echopype/echodata/echodata.py +++ b/echopype/echodata/echodata.py @@ -16,7 +16,6 @@ from ..echodata.utils_platform import _clip_by_time_dim, get_mappings_expanded from ..utils.coding import sanitize_dtypes, set_time_encodings -from ..utils.io import check_file_existence, delete_zarr_store, sanitize_file_path from ..utils.log import _init_logger from ..utils.prov import add_processing_level from .convention import sonarnetcdf_1 @@ -93,6 +92,8 @@ def cleanup_swap_files(self): v.store for k, v in dask_graph.items() if "original-from-zarr" in k ] fs = zarr_stores[0].fs + from ..utils.io import delete_zarr_store + for store in zarr_stores: delete_zarr_store(store, fs) @@ -486,11 +487,15 @@ def _load_file(self, raw_path: "PathHint"): def _check_path(self, filepath: "PathHint"): """Check if converted_raw_path exists""" + from ..utils.io import check_file_existence + file_exists = check_file_existence(filepath, self.storage_options) if not file_exists: raise FileNotFoundError(f"There is no file named {filepath}") def _sanitize_path(self, filepath: "PathHint") -> "PathHint": + from ..utils.io import sanitize_file_path + filepath = sanitize_file_path(filepath, self.storage_options) return filepath diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 8440603d2..0618c8836 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -6,7 +6,7 @@ import numpy as np import xarray as xr -from ..utils.io import validate_source_ds_da +from ..utils.io import validate_source from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level from .freq_diff import _check_freq_diff_source_Sv, _parse_freq_diff_eq @@ -25,7 +25,7 @@ def _validate_source_ds(source_ds, storage_options_ds): Validate the input ``source_ds`` and the associated ``storage_options_mask``. """ # Validate the source_ds type or path (if it is provided) - source_ds, file_type = validate_source_ds_da(source_ds, storage_options_ds) + source_ds, file_type = validate_source(source_ds, storage_options_ds) if isinstance(source_ds, str): # open up Dataset using source_ds path @@ -90,9 +90,7 @@ def _validate_and_collect_mask_input( for mask_ind in range(len(mask)): # validate the mask type or path (if it is provided) - mask_val, file_type = validate_source_ds_da( - mask[mask_ind], storage_options_mask[mask_ind] - ) + mask_val, file_type = validate_source(mask[mask_ind], storage_options_mask[mask_ind]) # replace mask element path with its corresponding DataArray if isinstance(mask_val, (str, pathlib.Path)): @@ -118,7 +116,7 @@ def _validate_and_collect_mask_input( ) # validate the mask type or path (if it is provided) - mask, file_type = validate_source_ds_da(mask, storage_options_mask) + mask, file_type = validate_source(mask, storage_options_mask) if isinstance(mask, (str, pathlib.Path)): # open up DataArray using mask path @@ -469,7 +467,7 @@ def frequency_differencing( freqAB, chanAB, operator, diff = _parse_freq_diff_eq(freqABEq, chanABEq) # validate the source_Sv type or path (if it is provided) - source_Sv, file_type = validate_source_ds_da(source_Sv, storage_options) + source_Sv, file_type = validate_source(source_Sv, storage_options) if isinstance(source_Sv, str): # open up Dataset using source_Sv path diff --git a/echopype/tests/calibrate/test_calibrate_ek80.py b/echopype/tests/calibrate/test_calibrate_ek80.py index 8e544b5f5..36fe4ca3a 100644 --- a/echopype/tests/calibrate/test_calibrate_ek80.py +++ b/echopype/tests/calibrate/test_calibrate_ek80.py @@ -251,4 +251,21 @@ def test_ek80_BB_power_echoview(ek80_path): ev_vals = df_real.values[:, :] ep_vals = pc_mean.values.real[:, :] assert np.allclose(ev_vals[:, 69:8284], ep_vals[:, 69:], atol=1e-4) - assert np.allclose(ev_vals[:, 90:8284], ep_vals[:, 90:], atol=1e-5) \ No newline at end of file + assert np.allclose(ev_vals[:, 90:8284], ep_vals[:, 90:], atol=1e-5) + + +def test_ek80_CW_complex_Sv_receiver_sampling_freq(ek80_path): + ek80_raw_path = str(ek80_path.joinpath("D20230804-T083032.raw")) + ed = ep.open_raw(ek80_raw_path, sonar_model="EK80") + # Parsed receiver_sampling_frequency is 0 + assert ed["Vendor_specific"]["receiver_sampling_frequency"] == 0 + # Calibration object + waveform_mode = "CW" + encode_mode = "complex" + ds_Sv = ep.calibrate.compute_Sv( + ed, waveform_mode=waveform_mode, encode_mode=encode_mode + ) + + # receiver_sampling_frequency substituted with default value in compute_Sv + assert ds_Sv["receiver_sampling_frequency"] is not None + assert np.allclose(ds_Sv["receiver_sampling_frequency"].data, 1500000) diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py index 128fa02cc..006da6f0c 100644 --- a/echopype/tests/conftest.py +++ b/echopype/tests/conftest.py @@ -20,6 +20,7 @@ def test_path(): 'EK60': TEST_DATA_FOLDER / "ek60", 'EK80': TEST_DATA_FOLDER / "ek80", 'EK80_NEW': TEST_DATA_FOLDER / "ek80_new", + 'ES60': TEST_DATA_FOLDER / "es60", 'ES70': TEST_DATA_FOLDER / "es70", 'ES80': TEST_DATA_FOLDER / "es80", 'AZFP': TEST_DATA_FOLDER / "azfp", diff --git a/echopype/tests/consolidate/test_consolidate_integration.py b/echopype/tests/consolidate/test_consolidate_integration.py index bb28815d8..c32fd9bf8 100644 --- a/echopype/tests/consolidate/test_consolidate_integration.py +++ b/echopype/tests/consolidate/test_consolidate_integration.py @@ -1,5 +1,6 @@ import math import os +import dask import pathlib import tempfile @@ -316,7 +317,7 @@ def _tests(ds_test, location_type, nmea_sentence=None): @pytest.mark.parametrize( ("sonar_model", "test_path_key", "raw_file_name", "paths_to_echoview_mat", - "waveform_mode", "encode_mode", "pulse_compression", "write_Sv_to_file"), + "waveform_mode", "encode_mode", "pulse_compression", "to_disk"), [ # ek60_CW_power ( @@ -382,7 +383,7 @@ def _tests(ds_test, location_type, nmea_sentence=None): ) def test_add_splitbeam_angle(sonar_model, test_path_key, raw_file_name, test_path, paths_to_echoview_mat, waveform_mode, encode_mode, - pulse_compression, write_Sv_to_file): + pulse_compression, to_disk): # obtain the EchoData object with the data needed for the calculation ed = ep.open_raw(test_path[test_path_key] / raw_file_name, sonar_model=sonar_model) @@ -394,7 +395,7 @@ def test_add_splitbeam_angle(sonar_model, test_path_key, raw_file_name, test_pat temp_dir = None # allows us to test for the case when source_Sv is a path - if write_Sv_to_file: + if to_disk: # create temporary directory for mask_file temp_dir = tempfile.TemporaryDirectory() @@ -410,7 +411,12 @@ def test_add_splitbeam_angle(sonar_model, test_path_key, raw_file_name, test_pat ds_Sv = ep.consolidate.add_splitbeam_angle(source_Sv=ds_Sv, echodata=ed, waveform_mode=waveform_mode, encode_mode=encode_mode, - pulse_compression=pulse_compression) + pulse_compression=pulse_compression, + to_disk=to_disk) + + if to_disk: + assert isinstance(ds_Sv["angle_alongship"].data, dask.array.core.Array) + assert isinstance(ds_Sv["angle_athwartship"].data, dask.array.core.Array) # obtain corresponding echoview output full_echoview_path = [test_path[test_path_key] / path for path in paths_to_echoview_mat] @@ -458,7 +464,8 @@ def test_add_splitbeam_angle_BB_pc(test_path): # add the split-beam angles to Sv dataset ds_Sv = ep.consolidate.add_splitbeam_angle( source_Sv=ds_Sv, echodata=ed, - waveform_mode="BB", encode_mode="complex", pulse_compression=True + waveform_mode="BB", encode_mode="complex", pulse_compression=True, + to_disk=False ) # Load pyecholab pickle diff --git a/echopype/tests/convert/test_convert_ek60.py b/echopype/tests/convert/test_convert_ek60.py index 4a414c566..b6e6a6d8d 100644 --- a/echopype/tests/convert/test_convert_ek60.py +++ b/echopype/tests/convert/test_convert_ek60.py @@ -9,6 +9,10 @@ def ek60_path(test_path): return test_path["EK60"] +@pytest.fixture +def es60_path(test_path): + return test_path["ES60"] + # raw_paths = ['./echopype/test_data/ek60/set1/' + file # for file in os.listdir('./echopype/test_data/ek60/set1')] # 2 range lengths @@ -191,3 +195,16 @@ def test_convert_ek60_splitbeam_no_angle(ek60_path): assert "angle_athwartship" not in ed["Sonar/Beam_group1"] assert "angle_alongship" not in ed["Sonar/Beam_group1"] + + +def test_convert_es60_no_unicode_error(es60_path): + """Convert a file should not give unicode error""" + + raw_path = ( + es60_path + / "L0007-D20191202-T060239-ES60.raw" + ) + try: + open_raw(raw_path, sonar_model='EK60') + except UnicodeDecodeError: + pytest.fail("UnicodeDecodeError raised") diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index 14b57b5fb..641c16ea0 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -5,6 +5,7 @@ from echopype import open_raw from echopype.testing import TEST_DATA_FOLDER +from echopype.convert.parse_ek80 import ParseEK80 from echopype.convert.set_groups_ek80 import WIDE_BAND_TRANS, PULSE_COMPRESS, FILTER_IMAG, FILTER_REAL, DECIMATION @@ -428,3 +429,16 @@ def test_convert_ek80_no_fil_coeff(ek80_path): for t in [WIDE_BAND_TRANS, PULSE_COMPRESS]: for p in [FILTER_REAL, FILTER_IMAG, DECIMATION]: assert f"{t}_{p}" not in vendor_spec_ds + + +def test_convert_ek80_mru1(ek80_path): + """Make sure we can convert EK80 file with MRU1 datagram.""" + ek80_mru1_path = str(ek80_path.joinpath('20231016_Cal_-D20231016-T220322.raw')) + echodata = open_raw(raw_file=ek80_mru1_path, sonar_model='EK80') + parser = ParseEK80(str(ek80_mru1_path), None) + parser.parse_raw() + + np.all(echodata["Platform"]["pitch"].data == np.array(parser.mru["pitch"])) + np.all(echodata["Platform"]["roll"].data == np.array(parser.mru["roll"])) + np.all(echodata["Platform"]["vertical_offset"].data == np.array(parser.mru["heave"])) + np.all(echodata["Platform"]["heading"].data == np.array(parser.mru["heading"])) diff --git a/echopype/tests/utils/test_utils_io.py b/echopype/tests/utils/test_utils_io.py index 0fa3db6ea..edf315583 100644 --- a/echopype/tests/utils/test_utils_io.py +++ b/echopype/tests/utils/test_utils_io.py @@ -11,7 +11,7 @@ sanitize_file_path, validate_output_path, env_indep_joinpath, - validate_source_ds_da, + validate_source, init_ep_dir ) import echopype.utils.io @@ -289,12 +289,12 @@ def test_env_indep_joinpath_os_dependent(save_path: str, is_windows: bool, is_cl ) def test_validate_source_ds_da(source_ds_da_input, storage_options_input, true_file_type): """ - Tests that ``validate_source_ds_da`` has the appropriate outputs. + Tests that ``validate_source`` has the appropriate outputs. An exhaustive list of combinations of ``source_ds_da`` and ``storage_options`` are tested in ``test_validate_output_path`` and are therefore not included here. """ - source_ds_output, file_type_output = validate_source_ds_da(source_ds_da_input, storage_options_input) + source_ds_output, file_type_output = validate_source(source_ds_da_input, storage_options_input) if isinstance(source_ds_da_input, (xr.Dataset, xr.DataArray)): assert source_ds_output.identical(source_ds_da_input) diff --git a/echopype/utils/io.py b/echopype/utils/io.py index b28a219ae..473a1e4f1 100644 --- a/echopype/utils/io.py +++ b/echopype/utils/io.py @@ -17,6 +17,8 @@ from fsspec.implementations.local import LocalFileSystem from zarr.storage import FSStore +from ..echodata import EchoData +from ..echodata.api import open_converted from ..utils.coding import set_storage_encodings from ..utils.log import _init_logger @@ -380,29 +382,30 @@ def env_indep_joinpath(*args: Tuple[str, ...]) -> str: return joined_path -def validate_source_ds_da( - source_ds_da: Union[xr.Dataset, xr.DataArray, str, Path], storage_options: Optional[dict] -) -> Tuple[Union[xr.Dataset, str, xr.DataArray], Optional[str]]: +def validate_source( + source: Union[xr.Dataset, xr.DataArray, EchoData, str, Path], + storage_options: Optional[dict], +) -> Tuple[Union[xr.Dataset, str, xr.DataArray, EchoData], Optional[str]]: """ - This function ensures that ``source_ds_da`` is of the correct - type and validates the path of ``source_ds_da``, if it is provided. + This function ensures that ``source`` is of the correct + type and validates the path of ``source``, if it is provided. Parameters ---------- - source_ds_da: xr.Dataset, xr.DataArray, str or pathlib.Path - A source that points to a Dataset or DataArray. If the input is a path, + source: xr.Dataset, xr.DataArray, EchoData, str or pathlib.Path + A source that points to a Dataset or DataArray or EchoData. If the input is a path, it specifies the path to a zarr or netcdf file. storage_options: dict, optional Any additional parameters for the storage backend, corresponding to the - path provided for ``source_ds_da`` + path provided for ``source`` Returns ------- - source_ds_da: xr.Dataset or xr.DataArray or str - A Dataset or DataArray which will be the same as the input ``source_ds_da`` or - a validated path to a zarr or netcdf file + source: xr.Dataset or xr.DataArray or EchoData or str + A Dataset or DataArray or EchoData which will be the same as the input ``source`` + or a validated path to a zarr or netcdf file file_type: {"netcdf4", "zarr"}, optional - The file type of the input path if ``source_ds_da`` is a path, otherwise ``None`` + The file type of the input path if ``source`` is a path, otherwise ``None`` """ # initialize file_type @@ -412,26 +415,42 @@ def validate_source_ds_da( if not isinstance(storage_options, dict): raise TypeError("storage_options must be a dict!") - # check that source_ds_da is of the correct type, if it is a path validate + # check that source is of the correct type, if it is a path validate # the path and open the Dataset or DataArray using xarray - if not isinstance(source_ds_da, (xr.Dataset, xr.DataArray, str, Path)): - raise TypeError("source_ds_da must be a Dataset or DataArray or str or pathlib.Path!") - elif isinstance(source_ds_da, (str, Path)): + if not isinstance(source, (xr.Dataset, xr.DataArray, EchoData, str, Path)): + raise TypeError("source must be a Dataset or DataArray or EchoData or str or pathlib.Path!") + elif isinstance(source, (str, Path)): # determine if we obtained a zarr or netcdf file - file_type = get_file_format(source_ds_da) + file_type = get_file_format(source) - # validate source_ds_da if it is a path - source_ds_da = validate_output_path( + # validate source if it is a path + source = validate_output_path( source_file="blank", # will be unused since source_ds cannot be none engine=file_type, output_storage_options=storage_options, - save_path=source_ds_da, + save_path=source, ) # check that the path exists - check_file_existence(file_path=source_ds_da, storage_options=storage_options) + check_file_existence(file_path=source, storage_options=storage_options) - return source_ds_da, file_type + return source, file_type + + +def open_source( + source: Optional[Union[xr.Dataset, xr.DataArray, EchoData, str, Path]], + source_type: str, + storage_options: Optional[dict], +) -> Tuple[Union[xr.Dataset, xr.DataArray, EchoData]]: + source, file_type = validate_source(source, storage_options) + + if isinstance(source, str): + if source_type == "dataset": + source = xr.open_dataset(source, engine=file_type, chunks={}, **storage_options) + if source_type == "echodata": + source = open_converted(source, engine=file_type, **storage_options) + + return source # Utilities for creating temporary swap zarr files -------------------------------------