Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expose func arg and skip_na to compute_MVBS users #1269

Merged
14 changes: 12 additions & 2 deletions echopype/commongrid/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ def compute_MVBS(
ds_Sv: xr.Dataset,
range_var: Literal["echo_range", "depth"] = "echo_range",
range_bin: str = "20m",
ping_time_bin: str = "20S",
ping_time_bin: str = "20s",
method="map-reduce",
skipna=True,
closed: Literal["left", "right"] = "left",
**flox_kwargs,
):
Expand All @@ -56,12 +57,15 @@ def compute_MVBS(
``depth`` as a data variable.
range_bin : str, default '20m'
bin size along ``echo_range`` or ``depth`` in meters.
ping_time_bin : str, default '20S'
ping_time_bin : str, default '20s'
bin size along ``ping_time``
method: str, default 'map-reduce'
The flox strategy for reduction of dask arrays only.
See flox `documentation <https://flox.readthedocs.io/en/latest/implementation.html>`_
for more details.
skipna: bool, default True
If true, mean function skips NaN values.
Else, mean function includes NaN values.
ctuguinay marked this conversation as resolved.
Show resolved Hide resolved
closed: {'left', 'right'}, default 'left'
Which side of bin interval is closed.
**flox_kwargs
Expand Down Expand Up @@ -105,6 +109,7 @@ def compute_MVBS(
ping_interval,
range_var=range_var,
method=method,
skipna=skipna,
**flox_kwargs,
)

Expand Down Expand Up @@ -268,6 +273,7 @@ def compute_NASC(
range_bin: str = "10m",
dist_bin: str = "0.5nmi",
method: str = "map-reduce",
skipna=True,
closed: Literal["left", "right"] = "left",
**flox_kwargs,
) -> xr.Dataset:
Expand All @@ -287,6 +293,9 @@ def compute_NASC(
The flox strategy for reduction of dask arrays only.
See flox `documentation <https://flox.readthedocs.io/en/latest/implementation.html>`_
for more details.
skipna: bool, default True
If true, mean function skips NaN values.
Else, mean function includes NaN values.
ctuguinay marked this conversation as resolved.
Show resolved Hide resolved
closed: {'left', 'right'}, default 'left'
Which side of bin interval is closed.
**flox_kwargs
Expand Down Expand Up @@ -357,6 +366,7 @@ def compute_NASC(
range_interval,
dist_interval,
method=method,
skipna=skipna,
**flox_kwargs,
)

Expand Down
33 changes: 30 additions & 3 deletions echopype/commongrid/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def compute_raw_MVBS(
ping_interval: Union[pd.IntervalIndex, np.ndarray],
range_var: Literal["echo_range", "depth"] = "echo_range",
method="map-reduce",
skipna=True,
**flox_kwargs,
):
"""
Expand All @@ -45,6 +46,9 @@ def compute_raw_MVBS(
The flox strategy for reduction of dask arrays only.
See flox `documentation <https://flox.readthedocs.io/en/latest/implementation.html>`_
for more details.
skipna: bool, default True
If true, mean function skips NaN values.
Else, mean function includes NaN values.
ctuguinay marked this conversation as resolved.
Show resolved Hide resolved
**flox_kwargs
Additional keyword arguments to be passed
to flox reduction function.
Expand All @@ -65,6 +69,8 @@ def compute_raw_MVBS(
x_var=x_var,
range_var=range_var,
method=method,
func="nanmean" if skipna else "mean",
skipna=skipna,
**flox_kwargs,
)

Expand All @@ -80,6 +86,7 @@ def compute_raw_NASC(
range_interval: Union[pd.IntervalIndex, np.ndarray],
dist_interval: Union[pd.IntervalIndex, np.ndarray],
method="map-reduce",
skipna=True,
**flox_kwargs,
):
"""
Expand All @@ -100,6 +107,9 @@ def compute_raw_NASC(
The flox strategy for reduction of dask arrays only.
See flox `documentation <https://flox.readthedocs.io/en/latest/implementation.html>`_
for more details.
skipna: bool, default True
If true, mean function skips NaN values.
Else, mean function includes NaN values.
ctuguinay marked this conversation as resolved.
Show resolved Hide resolved
**flox_kwargs
Additional keyword arguments to be passed
to flox reduction function.
Expand All @@ -126,6 +136,8 @@ def compute_raw_NASC(
x_var=x_var,
range_var=range_var,
method=method,
func="nanmean" if skipna else "mean",
skipna=skipna,
**flox_kwargs,
)

Expand All @@ -136,6 +148,7 @@ def compute_raw_NASC(
ds_Sv["ping_time"],
ds_Sv[x_var],
func="nanmean",
skipna=True,
expected_groups=(dist_interval),
isbin=True,
method=method,
Expand All @@ -154,7 +167,8 @@ def compute_raw_NASC(
h_mean_denom = xarray_reduce(
da_denom,
ds_Sv[x_var],
func="sum",
func="nansum",
skipna=True,
expected_groups=(dist_interval),
isbin=[True],
method=method,
Expand All @@ -165,7 +179,8 @@ def compute_raw_NASC(
ds_Sv["channel"],
ds_Sv[x_var],
ds_Sv[range_var].isel(**{range_dim: slice(0, -1)}),
func="sum",
func="nansum",
skipna=True,
expected_groups=(None, dist_interval, range_interval),
isbin=[False, True, True],
method=method,
Expand Down Expand Up @@ -480,6 +495,8 @@ def _groupby_x_along_channels(
x_var: Literal["ping_time", "distance_nmi"] = "ping_time",
range_var: Literal["echo_range", "depth"] = "echo_range",
method: str = "map-reduce",
func: str = "nanmean",
skipna: bool = True,
**flox_kwargs,
) -> xr.Dataset:
"""
Expand Down Expand Up @@ -517,6 +534,15 @@ def _groupby_x_along_channels(
The flox strategy for reduction of dask arrays only.
See flox `documentation <https://flox.readthedocs.io/en/latest/implementation.html>`_
for more details.
func: str, default 'nanmean'
The aggregation function used for reducing the data array.
By default, 'nanmean' is used. Other options can be found in the flox `documentation
<https://flox.readthedocs.io/en/latest/generated/flox.xarray.xarray_reduce.html>`_.
skipna: bool, default True
If true, aggregation function skips NaN values.
Else, aggregation function includes NaN values.
Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation
will have the same behavior as if func is set to 'nanmean'.
**flox_kwargs
Additional keyword arguments to be passed
to flox reduction function.
Expand Down Expand Up @@ -548,10 +574,11 @@ def _groupby_x_along_channels(
ds_Sv["channel"],
ds_Sv[x_var],
ds_Sv[range_var],
func="nanmean",
expected_groups=(None, x_interval, range_interval),
isbin=[False, True, True],
method=method,
func=func,
skipna=skipna,
**flox_kwargs,
)
return sv_mean
15 changes: 8 additions & 7 deletions echopype/tests/commongrid/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def mock_Sv_dataset_irregular(
# Sprinkle nans around echo_range
for pos in mock_nan_ilocs:
ds_Sv["echo_range"][pos] = np.nan
ds_Sv["Sv"][pos] = np.nan
return ds_Sv


Expand Down Expand Up @@ -201,7 +202,7 @@ def mock_nasc_array_regular(mock_Sv_dataset_regular, mock_parameters):
dist_bin = 0.5
range_bin = 2
channel_len = mock_parameters["channel_len"]
expected_nasc_val = _get_expected_nasc_val(ds_Sv, dist_bin, range_bin, channel_len)
expected_nasc_val = _get_expected_nasc_val_nanmean(ds_Sv, dist_bin, range_bin, channel_len)

return expected_nasc_val

Expand Down Expand Up @@ -237,7 +238,7 @@ def mock_nasc_array_irregular(mock_Sv_dataset_irregular, mock_parameters):
dist_bin = 0.5
range_bin = 2
channel_len = mock_parameters["channel_len"]
expected_nasc_val = _get_expected_nasc_val(ds_Sv, dist_bin, range_bin, channel_len)
expected_nasc_val = _get_expected_nasc_val_nanmean(ds_Sv, dist_bin, range_bin, channel_len)

return expected_nasc_val

Expand Down Expand Up @@ -463,12 +464,12 @@ def mock_Sv_dataset_NASC(mock_parameters, random_number_generator):


# Helper functions to generate mock Sv and MVBS dataset
def _get_expected_nasc_val(
def _get_expected_nasc_val_nanmean(
ds_Sv: xr.Dataset, dist_bin: str, range_bin: float, channel_len: int = 2
) -> np.ndarray:
"""
Helper functions to generate expected NASC outputs from mock Sv dataset
by brute-force looping and compute the mean
by brute-force looping and compute the nanmean

Parameters
----------
Expand Down Expand Up @@ -500,7 +501,7 @@ def _get_expected_nasc_val(
sv = ds_Sv["Sv"].pipe(ep.utils.compute._log2lin)

# Compute sv mean
sv_mean = _brute_mean_reduce_3d(
sv_mean = _brute_nanmean_reduce_3d(
sv, ds_Sv, "depth", "distance_nmi", channel_len, dist_interval, range_interval
)

Expand Down Expand Up @@ -544,7 +545,7 @@ def _get_expected_nasc_val(
return sv_mean * h_mean * 4 * np.pi * 1852**2


def _brute_mean_reduce_3d(
def _brute_nanmean_reduce_3d(
sv: xr.DataArray,
ds_Sv: xr.Dataset,
range_var: Literal["echo_range", "depth"],
Expand Down Expand Up @@ -610,7 +611,7 @@ def _brute_mean_reduce_3d(
if 0 in sv_tmp.shape:
mean_vals[ch_idx, x_idx, r_idx] = np.nan
else:
mean_vals[ch_idx, x_idx, r_idx] = np.mean(sv_tmp)
mean_vals[ch_idx, x_idx, r_idx] = np.nanmean(sv_tmp)
return mean_vals


Expand Down
36 changes: 35 additions & 1 deletion echopype/tests/commongrid/test_commongrid_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,11 +438,45 @@ def test_compute_NASC_values(request, er_type):
ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular")
expected_nasc = request.getfixturevalue("mock_nasc_array_irregular")

ds_NASC = ep.commongrid.compute_NASC(ds_Sv, range_bin=range_bin, dist_bin=dist_bin)
ds_NASC = ep.commongrid.compute_NASC(ds_Sv, range_bin=range_bin, dist_bin=dist_bin, skipna=True)

assert ds_NASC.NASC.shape == expected_nasc.shape
# Floating digits need to check with all close not equal
# Compare the values of the MVBS array with the expected values
assert np.allclose(
ds_NASC.NASC.values, expected_nasc.values, atol=1e-10, rtol=1e-10, equal_nan=True
)


@pytest.mark.integration
@pytest.mark.parametrize(
("operation","skipna"),
[
("MVBS", True),
("NASC", False),
],
)
def test_compute_MVBS_NASC_skipna_nan_and_non_nan_values(request, operation, skipna):
# Create subset dataset with 2 channels, 2 ping times, 2 depth values:

# Get fixture for irregular Sv
ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular")
# Already has 2 channels, so subset for only ping time and range sample
subset_ds_Sv = ds_Sv.isel(ping_time=slice(0,2), range_sample=slice(0,2))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having only 2 depth values makes the mock dataset too small for the test to actually check whether the number and location of NaN elements at the output of NASC or MVBS computation is correct. I think you can select 2 of the pings, with one of them having some NaN elements and the other doesn't, but retain all elements along echo_range. This dataarray size should be manageable but would be a lot more realistic.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. I'll make these changes.


# Compute MVBS / Compute NASC
if operation == "MVBS":
da = ep.commongrid.compute_MVBS(subset_ds_Sv, skipna=skipna)["Sv"]
else:
da = ep.commongrid.compute_NASC(subset_ds_Sv, skipna=skipna)["NASC"]

# Check that da is 2 values: 1 value for each channel
assert da.shape == (2, 1, 1)

# Check for appropriate NaN/non-NaN values
if skipna:
assert not np.isnan(da.isel(channel=0).squeeze().data)
assert not np.isnan(da.isel(channel=1).squeeze().data)
else:
assert np.isnan(da.isel(channel=0).squeeze().data)
assert not np.isnan(da.isel(channel=1).squeeze().data)