Skip to content

Commit

Permalink
Implementation of data-processing-level attributes (#1001)
Browse files Browse the repository at this point in the history
* Draft, rough, partial implementation of data-processing-level attribute insertion

* Generalized add_processing_level to serve as decorator for class methods. Decorated echodata.update_platform

* Finalize processing level decorator function, handle new wildcard-based level or sublevel propagation forms like L*A or L2*; other cleanups to the function

* Add processing level decorator to remove_noise, apply_mask and compute_MVBS_index_binning

* Added error checks and warnings (echopype logger.info) to add_processing_level

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added integration test for processing levels functionality, testing multiple processing steps

* Update echopype/tests/utils/test_processinglevels_integration.py

* Rename sv_ds to Sv_ds in test, for clarity and accuracy

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Wu-Jung Lee <[email protected]>
  • Loading branch information
3 people authored Mar 24, 2023
1 parent cb44dbc commit e9a5d86
Show file tree
Hide file tree
Showing 8 changed files with 310 additions and 4 deletions.
7 changes: 6 additions & 1 deletion echopype/clean/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Functions for reducing variabilities in backscatter data.
"""

from ..utils.prov import echopype_prov_attrs
from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level
from .noise_est import NoiseEst


Expand Down Expand Up @@ -32,6 +32,7 @@ def estimate_noise(ds_Sv, ping_num, range_sample_num, noise_max=None):
return noise_obj.Sv_noise


@add_processing_level("L*B")
def remove_noise(ds_Sv, ping_num, range_sample_num, noise_max=None, SNR_threshold=3):
"""
Remove noise by using estimates of background noise
Expand Down Expand Up @@ -68,4 +69,8 @@ def remove_noise(ds_Sv, ping_num, range_sample_num, noise_max=None, SNR_threshol
prov_dict["processing_function"] = "clean.remove_noise"
ds_Sv = ds_Sv.assign_attrs(prov_dict)

# The output ds_Sv is built as a copy of the input ds_Sv, so the step below is
# not needed, strictly speaking. But doing makes the decorator function more generic
ds_Sv = insert_input_processing_level(ds_Sv, input_ds=ds_Sv)

return ds_Sv
8 changes: 7 additions & 1 deletion echopype/commongrid/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
import xarray as xr

from ..utils.prov import echopype_prov_attrs
from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level
from .mvbs import get_MVBS_along_channels
from .nasc import (
check_identical_depth,
Expand Down Expand Up @@ -70,6 +70,7 @@ def _set_MVBS_attrs(ds):
)


@add_processing_level("L3*")
def compute_MVBS(ds_Sv, range_meter_bin=20, ping_time_bin="20S"):
"""
Compute Mean Volume Backscattering Strength (MVBS)
Expand Down Expand Up @@ -174,9 +175,12 @@ def compute_MVBS(ds_Sv, range_meter_bin=20, ping_time_bin="20S"):
ds_MVBS = ds_MVBS.assign_attrs(prov_dict)
ds_MVBS["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal

ds_MVBS = insert_input_processing_level(ds_MVBS, input_ds=ds_Sv)

return ds_MVBS


@add_processing_level("L3*")
def compute_MVBS_index_binning(ds_Sv, range_sample_num=100, ping_num=100):
"""
Compute Mean Volume Backscattering Strength (MVBS)
Expand Down Expand Up @@ -246,6 +250,8 @@ def compute_MVBS_index_binning(ds_Sv, range_sample_num=100, ping_num=100):
ds_MVBS = ds_MVBS.assign_attrs(prov_dict)
ds_MVBS["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal

ds_MVBS = insert_input_processing_level(ds_MVBS, input_ds=ds_Sv)

return ds_MVBS


Expand Down
2 changes: 2 additions & 0 deletions echopype/consolidate/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ..echodata import EchoData
from ..echodata.simrad import retrieve_correct_beam_group
from ..utils.io import validate_source_ds_da
from ..utils.prov import add_processing_level
from .split_beam_angle import add_angle_to_ds, get_angle_complex_samples, get_angle_power_samples


Expand Down Expand Up @@ -128,6 +129,7 @@ def add_depth(
return ds


@add_processing_level("L2A")
def add_location(ds: xr.Dataset, echodata: EchoData = None, nmea_sentence: Optional[str] = None):
"""
Add geographical location (latitude/longitude) to the Sv dataset.
Expand Down
2 changes: 2 additions & 0 deletions echopype/convert/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ..utils import io
from ..utils.coding import COMPRESSION_SETTINGS
from ..utils.log import _init_logger
from ..utils.prov import add_processing_level

BEAM_SUBGROUP_DEFAULT = "Beam_group1"

Expand Down Expand Up @@ -306,6 +307,7 @@ def _check_file(
return str(raw_file), str(xml)


@add_processing_level("L1A", is_echodata=True)
def open_raw(
raw_file: "PathHint",
sonar_model: "SonarModelsHint",
Expand Down
2 changes: 2 additions & 0 deletions echopype/echodata/echodata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..utils.coding import set_time_encodings
from ..utils.io import check_file_existence, sanitize_file_path
from ..utils.log import _init_logger
from ..utils.prov import add_processing_level
from .convention import sonarnetcdf_1
from .sensor_ep_version_mapping import ep_version_mapper
from .widgets.utils import tree_repr
Expand Down Expand Up @@ -260,6 +261,7 @@ def __setattr__(self, __name: str, __value: Any) -> None:
self._tree[group_path].ds = __value
super().__setattr__(__name, attr_value)

@add_processing_level("L1A")
def update_platform(
self,
extra_platform_data: xr.Dataset,
Expand Down
5 changes: 4 additions & 1 deletion echopype/mask/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import xarray as xr

from ..utils.io import validate_source_ds_da
from ..utils.prov import echopype_prov_attrs
from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level

# lookup table with key string operator and value as corresponding Python operator
str2ops = {
Expand Down Expand Up @@ -196,6 +196,7 @@ def _variable_prov_attrs(
return attrs


@add_processing_level("L3*")
def apply_mask(
source_ds: Union[xr.Dataset, str, pathlib.Path],
mask: Union[xr.DataArray, str, pathlib.Path, List[Union[xr.DataArray, str, pathlib.Path]]],
Expand Down Expand Up @@ -292,6 +293,8 @@ def apply_mask(

output_ds = output_ds.assign_attrs(prov_dict)

output_ds = insert_input_processing_level(output_ds, input_ds=source_ds)

return output_ds


Expand Down
122 changes: 122 additions & 0 deletions echopype/tests/utils/test_processinglevels_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pytest

import numpy as np
import xarray as xr
import echopype as ep


@pytest.mark.parametrize(
["sonar_model", "path_model", "raw_and_xml_paths", "extras"],
[
(
"EK60",
"EK60",
("Winter2017-D20170115-T150122.raw", None),
{},
),
(
"AZFP",
"AZFP",
("17082117.01A", "17041823.XML"),
{"longitude": -60.0, "latitude": 45.0, "salinity": 27.9, "pressure": 59},
),
],
)
def test_raw_to_mvbs(
sonar_model,
path_model,
raw_and_xml_paths,
extras,
test_path
):
# Prepare the Sv dataset
raw_path = test_path[path_model] / raw_and_xml_paths[0]
if raw_and_xml_paths[1]:
xml_path = test_path[path_model] / raw_and_xml_paths[1]
else:
xml_path = None

def _presence_test(test_ds, processing_level):
assert "processing_level" in test_ds.attrs
assert "processing_level_url" in test_ds.attrs
assert test_ds.attrs["processing_level"] == processing_level

def _absence_test(test_ds):
assert "processing_level" not in test_ds.attrs
assert "processing_level_url" not in test_ds.attrs

# ---- Convert raw file and update_platform
ed = ep.open_raw(raw_path, xml_path=xml_path, sonar_model=sonar_model)
if "longitude" in ed['Platform'].data_vars and "latitude" in ed['Platform'].data_vars:
_presence_test(ed["Top-level"], "Level 1A")
elif "longitude" in extras and "latitude" in extras:
_absence_test(ed["Top-level"])
point_ds = xr.Dataset(
{
"latitude": (["time"], np.array([float(extras["latitude"])])),
"longitude": (["time"], np.array([float(extras["longitude"])])),
},
coords={
"time": (["time"], np.array([ed["Sonar/Beam_group1"]["ping_time"].values.min()]))
},
)
ed.update_platform(point_ds)
_presence_test(ed["Top-level"], "Level 1A")
else:
_absence_test(ed["Top-level"])
raise RuntimeError(
"Platform latitude and longitude are not present and cannot be added "
"using update_platform based on test raw file and included parameters."
)

# ---- Calibrate and add_latlon
env_params = None
if sonar_model == "AZFP":
# AZFP data require external salinity and pressure
env_params = {
"temperature": ed["Environment"]["temperature"].values.mean(),
"salinity": extras["salinity"],
"pressure": extras["pressure"],
}

ds = ep.calibrate.compute_Sv(echodata=ed, env_params=env_params)
_absence_test(ds)

Sv_ds = ep.consolidate.add_location(ds=ds, echodata=ed)
assert "longitude" in Sv_ds.data_vars and "latitude" in Sv_ds.data_vars
_presence_test(Sv_ds, "Level 2A")

# ---- Noise removal
denoised_ds = ep.clean.remove_noise(Sv_ds, ping_num=10, range_sample_num=20)
_presence_test(denoised_ds, "Level 2B")

# ---- apply_mask based on frequency differencing
def _freqdiff_applymask(test_ds):
# frequency_differencing expects a dataarray variable named "Sv". For denoised Sv,
# rename Sv to Sv_raw and Sv_corrected to Sv before passing ds to frequency_differencing
if "Sv_corrected" in test_ds.data_vars:
out_ds = test_ds.rename_vars(name_dict={"Sv": "Sv_raw", "Sv_corrected": "Sv"})
else:
out_ds = test_ds
freqAB = list(out_ds.frequency_nominal.values[:2])
freqdiff_da = ep.mask.frequency_differencing(source_Sv=out_ds, freqAB=freqAB, operator=">", diff=5)

# Create a new, single-channel Sv variable in ds to pass to apply_mask.
# The resulting masked Sv will be single-channel.
out_ds["Sv_ch0"] = out_ds["Sv"].isel(channel=0).squeeze()
return ep.mask.apply_mask(source_ds=out_ds, var_name="Sv_ch0", mask=freqdiff_da)

# On Sv w/o noise removal
ds = _freqdiff_applymask(Sv_ds)
_presence_test(ds, "Level 3A")

# On denoised Sv
ds = _freqdiff_applymask(denoised_ds)
_presence_test(ds, "Level 3B")

# ---- Compute MVBS
# compute_MVBS expects a variable named "Sv"
# No product level is assigned because at present compute_MVBS drops the lat/lon data associated with the input Sv dataset
ds = ds.rename_vars(name_dict={"Sv": "Sv_unmasked", "Sv_ch0": "Sv"})
mvbs_ds = ep.commongrid.compute_MVBS(ds, range_meter_bin=30, ping_time_bin='1min')
_absence_test(mvbs_ds)
Loading

0 comments on commit e9a5d86

Please sign in to comment.