From 5f079fbdea81dd0c4c83af6a7b41d3ffa708ee0b Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Thu, 29 Feb 2024 17:08:09 +0000 Subject: [PATCH 01/15] add func arg --- echopype/commongrid/api.py | 6 ++++++ echopype/commongrid/utils.py | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 4bac09813..5217cdf07 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -33,6 +33,7 @@ def compute_MVBS( range_bin: str = "20m", ping_time_bin: str = "20S", method="map-reduce", + func="nanmean", closed: Literal["left", "right"] = "left", **flox_kwargs, ): @@ -62,6 +63,10 @@ def compute_MVBS( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. + func: str, default 'nanmean' + The flox aggregation function used for reducing the data array. + By default, 'nanmean' is used. Other options can be found in the flox `documentation + `_. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs @@ -105,6 +110,7 @@ def compute_MVBS( ping_interval, range_var=range_var, method=method, + func=func, **flox_kwargs, ) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index 39d2cd62a..afc53e1dd 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -20,6 +20,7 @@ def compute_raw_MVBS( ping_interval: Union[pd.IntervalIndex, np.ndarray], range_var: Literal["echo_range", "depth"] = "echo_range", method="map-reduce", + func="nanmean", **flox_kwargs, ): """ @@ -45,6 +46,10 @@ def compute_raw_MVBS( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. + func: str, default 'nanmean' + The flox aggregation function used for reducing the data array. + By default, 'nanmean' is used. Other options can be found in the flox `documentation + `_. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -65,6 +70,7 @@ def compute_raw_MVBS( x_var=x_var, range_var=range_var, method=method, + func=func, **flox_kwargs, ) @@ -480,6 +486,7 @@ def _groupby_x_along_channels( x_var: Literal["ping_time", "distance_nmi"] = "ping_time", range_var: Literal["echo_range", "depth"] = "echo_range", method: str = "map-reduce", + func: str = "nanmean", **flox_kwargs, ) -> xr.Dataset: """ @@ -517,6 +524,10 @@ def _groupby_x_along_channels( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. + func: str, default 'nanmean' + The aggregation function used for reducing the data array. + By default, 'nanmean' is used. Other options can be found in the flox `documentation + `_. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -552,6 +563,7 @@ def _groupby_x_along_channels( expected_groups=(None, x_interval, range_interval), isbin=[False, True, True], method=method, + func=func, **flox_kwargs, ) return sv_mean From 5bf588bd90da76b7785ddc218788ec6525cbdc47 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Thu, 29 Feb 2024 17:13:31 +0000 Subject: [PATCH 02/15] remove extra func --- echopype/commongrid/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index afc53e1dd..f61662b0c 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -559,7 +559,6 @@ def _groupby_x_along_channels( ds_Sv["channel"], ds_Sv[x_var], ds_Sv[range_var], - func="nanmean", expected_groups=(None, x_interval, range_interval), isbin=[False, True, True], method=method, From 7c21a37663284a6960b658617670d7e04bab1e3e Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Thu, 29 Feb 2024 22:22:54 +0000 Subject: [PATCH 03/15] update test to include mean --- .../tests/commongrid/test_commongrid_api.py | 73 ++++++++++++++----- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index 6e3a84385..ed229ae32 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd from flox.xarray import xarray_reduce +import xarray as xr import echopype as ep from echopype.consolidate import add_location, add_depth from echopype.commongrid.utils import ( @@ -341,14 +342,23 @@ def test_compute_MVBS_range_output(request, er_type): @pytest.mark.integration @pytest.mark.parametrize( - ("er_type"), + ("er_type", "func", "add_nan"), [ - ("regular"), - ("irregular"), + ("regular", "nanmean", "no_add_nan"), + ("irregular", "nanmean", "no_add_nan"), + ("regular", "mean", "no_add_nan"), + ("irregular", "mean", "no_add_nan"), + ("regular", "nanmean", "add_nan"), + ("regular", "mean", "add_nan"), + ("irregular", "nanmean", "add_nan"), + ("irregular", "mean", "add_nan"), ], ) -def test_compute_MVBS_values(request, er_type): - """Tests for the values of compute_MVBS on regular and irregular data.""" +def test_compute_MVBS_values(request, er_type, func, add_nan): + """ + Tests for the values of compute_MVBS on regular vsirregular, func as nanmean vs mean, + and added NaN vs no added Nan data. + """ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: """Go through and figure out nan values in result""" @@ -401,18 +411,47 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") expected_mvbs = request.getfixturevalue("mock_mvbs_array_irregular") - ds_MVBS = ep.commongrid.compute_MVBS(ds_Sv, range_bin=range_bin, ping_time_bin=ping_time_bin) - - expected_outputs = _parse_nans(ds_MVBS, ds_Sv) - - assert ds_MVBS.Sv.shape == expected_mvbs.shape - # Floating digits need to check with all close not equal - # Compare the values of the MVBS array with the expected values - assert np.allclose(ds_MVBS.Sv.values, expected_mvbs, atol=1e-10, rtol=1e-10, equal_nan=True) - - # Ensures that the computation of MVBS takes doesn't take into account NaN values - # that are sporadically placed in the echo_range values - assert np.array_equal(np.isnan(ds_MVBS.Sv.values), expected_outputs) + # Check to see if MVBS matches request fixture arrays + if add_nan == "no_add_nan": + # Compute MVBS + ds_MVBS = ep.commongrid.compute_MVBS( + ds_Sv, + range_bin=range_bin, + ping_time_bin=ping_time_bin, + func=func, + skipna=False + ) + + # Compute expected outputs + expected_outputs = _parse_nans(ds_MVBS, ds_Sv) + + assert ds_MVBS.Sv.shape == expected_mvbs.shape + # Floating digits need to check with all close not equal + # Compare the values of the MVBS array with the expected values + assert np.allclose(ds_MVBS.Sv.values, expected_mvbs, atol=1e-10, rtol=1e-10, equal_nan=True) + + # Ensures that the computation of MVBS takes doesn't take into account NaN values + # that are sporadically placed in the echo_range values + assert np.array_equal(np.isnan(ds_MVBS.Sv.values), expected_outputs) + + # Check appropriate aggregate function behavior when NaNs are added to first channel + elif add_nan == "add_nan": + # Add 5 NaN values to ds_Sv and compute MVBS + ds_Sv["Sv"][0, 0, 0:5] = np.nan + ds_MVBS = ep.commongrid.compute_MVBS( + ds_Sv, + range_bin=range_bin, + ping_time_bin=ping_time_bin, + func=func, + skipna=False + ) + if func == "mean": + # Ensure that the 5 NaN Sv values, now projected onto the regridded dataset, turn into 2 NaN values in the case + # where func is mean. + assert np.array_equal(ds_MVBS["Sv"][0, 0, 0:2].data, np.array([np.nan, np.nan]), equal_nan=True) + elif func == "nanmean": + # Ensure that all values in regridded are non-NaN when func is nanmean + assert not np.isnan(ds_MVBS["Sv"][0, 0, :].data).any() @pytest.mark.integration From 82a072aa792adbd56daf5dfe4fcf286af28224c0 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 5 Mar 2024 20:34:34 +0000 Subject: [PATCH 04/15] expose skipna flox arg --- echopype/commongrid/api.py | 7 +++++++ echopype/commongrid/utils.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 5217cdf07..0677c3af9 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -34,6 +34,7 @@ def compute_MVBS( ping_time_bin: str = "20S", method="map-reduce", func="nanmean", + skipna=True, closed: Literal["left", "right"] = "left", **flox_kwargs, ): @@ -67,6 +68,11 @@ def compute_MVBS( The flox aggregation function used for reducing the data array. By default, 'nanmean' is used. Other options can be found in the flox `documentation `_. + skipna: bool, default True + If true, aggregation function skips NaN values. + Else, aggregation function includes NaN values. + Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation + will have the same behavior as if func is set to 'nanmean'. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs @@ -111,6 +117,7 @@ def compute_MVBS( range_var=range_var, method=method, func=func, + skipna=skipna, **flox_kwargs, ) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index f61662b0c..d56fc315d 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -21,6 +21,7 @@ def compute_raw_MVBS( range_var: Literal["echo_range", "depth"] = "echo_range", method="map-reduce", func="nanmean", + skipna=True, **flox_kwargs, ): """ @@ -50,6 +51,11 @@ def compute_raw_MVBS( The flox aggregation function used for reducing the data array. By default, 'nanmean' is used. Other options can be found in the flox `documentation `_. + skipna: bool, default True + If true, aggregation function skips NaN values. + Else, aggregation function includes NaN values. + Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation + will have the same behavior as if func is set to 'nanmean'. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -71,6 +77,7 @@ def compute_raw_MVBS( range_var=range_var, method=method, func=func, + skipna=skipna, **flox_kwargs, ) @@ -487,6 +494,7 @@ def _groupby_x_along_channels( range_var: Literal["echo_range", "depth"] = "echo_range", method: str = "map-reduce", func: str = "nanmean", + skipna: bool = True, **flox_kwargs, ) -> xr.Dataset: """ @@ -528,6 +536,11 @@ def _groupby_x_along_channels( The aggregation function used for reducing the data array. By default, 'nanmean' is used. Other options can be found in the flox `documentation `_. + skipna: bool, default True + If true, aggregation function skips NaN values. + Else, aggregation function includes NaN values. + Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation + will have the same behavior as if func is set to 'nanmean'. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -563,6 +576,7 @@ def _groupby_x_along_channels( isbin=[False, True, True], method=method, func=func, + skipna=skipna, **flox_kwargs, ) return sv_mean From 2a98ef45b2885c3180795ad6e8b0b196a756a1d0 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 5 Mar 2024 21:21:59 +0000 Subject: [PATCH 05/15] add additional check --- echopype/tests/commongrid/test_commongrid_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index ed229ae32..22ef30386 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -449,6 +449,7 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: # Ensure that the 5 NaN Sv values, now projected onto the regridded dataset, turn into 2 NaN values in the case # where func is mean. assert np.array_equal(ds_MVBS["Sv"][0, 0, 0:2].data, np.array([np.nan, np.nan]), equal_nan=True) + assert np.sum(np.isnan(ds_MVBS["Sv"][0, 0, :].data)) == 2 elif func == "nanmean": # Ensure that all values in regridded are non-NaN when func is nanmean assert not np.isnan(ds_MVBS["Sv"][0, 0, :].data).any() From 394254c3da8c10bdd511d88035081463f3dca42f Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Wed, 27 Mar 2024 21:04:40 +0000 Subject: [PATCH 06/15] hide func and keep skipna exposed --- echopype/commongrid/api.py | 12 +----- echopype/commongrid/utils.py | 13 ++----- .../tests/commongrid/test_commongrid_api.py | 38 +++++++++---------- 3 files changed, 23 insertions(+), 40 deletions(-) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 0677c3af9..20f6a16df 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -33,7 +33,6 @@ def compute_MVBS( range_bin: str = "20m", ping_time_bin: str = "20S", method="map-reduce", - func="nanmean", skipna=True, closed: Literal["left", "right"] = "left", **flox_kwargs, @@ -64,15 +63,9 @@ def compute_MVBS( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. - func: str, default 'nanmean' - The flox aggregation function used for reducing the data array. - By default, 'nanmean' is used. Other options can be found in the flox `documentation - `_. skipna: bool, default True - If true, aggregation function skips NaN values. - Else, aggregation function includes NaN values. - Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation - will have the same behavior as if func is set to 'nanmean'. + If true, mean function skips NaN values. + Else, mean function includes NaN values. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs @@ -116,7 +109,6 @@ def compute_MVBS( ping_interval, range_var=range_var, method=method, - func=func, skipna=skipna, **flox_kwargs, ) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index d56fc315d..59eef0e08 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -20,7 +20,6 @@ def compute_raw_MVBS( ping_interval: Union[pd.IntervalIndex, np.ndarray], range_var: Literal["echo_range", "depth"] = "echo_range", method="map-reduce", - func="nanmean", skipna=True, **flox_kwargs, ): @@ -47,15 +46,9 @@ def compute_raw_MVBS( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. - func: str, default 'nanmean' - The flox aggregation function used for reducing the data array. - By default, 'nanmean' is used. Other options can be found in the flox `documentation - `_. skipna: bool, default True - If true, aggregation function skips NaN values. - Else, aggregation function includes NaN values. - Note that if ``func`` is set to 'mean' and ``skipna`` is set to True, then aggregation - will have the same behavior as if func is set to 'nanmean'. + If true, mean function skips NaN values. + Else, mean function includes NaN values. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -76,7 +69,7 @@ def compute_raw_MVBS( x_var=x_var, range_var=range_var, method=method, - func=func, + func="nanmean" if skipna else "mean", skipna=skipna, **flox_kwargs, ) diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index 22ef30386..9a3e23369 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -342,21 +342,21 @@ def test_compute_MVBS_range_output(request, er_type): @pytest.mark.integration @pytest.mark.parametrize( - ("er_type", "func", "add_nan"), + ("er_type", "skipna", "add_nan"), [ - ("regular", "nanmean", "no_add_nan"), - ("irregular", "nanmean", "no_add_nan"), - ("regular", "mean", "no_add_nan"), - ("irregular", "mean", "no_add_nan"), - ("regular", "nanmean", "add_nan"), - ("regular", "mean", "add_nan"), - ("irregular", "nanmean", "add_nan"), - ("irregular", "mean", "add_nan"), + ("regular", True, "no_add_nan"), + ("irregular", True, "no_add_nan"), + ("regular", False, "no_add_nan"), + ("irregular", False, "no_add_nan"), + ("regular", True, "add_nan"), + ("regular", False, "add_nan"), + ("irregular", True, "add_nan"), + ("irregular", False, "add_nan"), ], ) -def test_compute_MVBS_values(request, er_type, func, add_nan): +def test_compute_MVBS_values(request, er_type, skipna, add_nan): """ - Tests for the values of compute_MVBS on regular vsirregular, func as nanmean vs mean, + Tests for the values of compute_MVBS on regular vsirregular, skipna as True vs False, and added NaN vs no added Nan data. """ @@ -418,8 +418,7 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: ds_Sv, range_bin=range_bin, ping_time_bin=ping_time_bin, - func=func, - skipna=False + skipna=skipna ) # Compute expected outputs @@ -434,7 +433,7 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: # that are sporadically placed in the echo_range values assert np.array_equal(np.isnan(ds_MVBS.Sv.values), expected_outputs) - # Check appropriate aggregate function behavior when NaNs are added to first channel + # Check appropriate output when NaNs are added to first channel elif add_nan == "add_nan": # Add 5 NaN values to ds_Sv and compute MVBS ds_Sv["Sv"][0, 0, 0:5] = np.nan @@ -442,16 +441,15 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: ds_Sv, range_bin=range_bin, ping_time_bin=ping_time_bin, - func=func, - skipna=False + skipna=skipna ) - if func == "mean": + if not skipna: # Ensure that the 5 NaN Sv values, now projected onto the regridded dataset, turn into 2 NaN values in the case - # where func is mean. + # where skipna is False. assert np.array_equal(ds_MVBS["Sv"][0, 0, 0:2].data, np.array([np.nan, np.nan]), equal_nan=True) assert np.sum(np.isnan(ds_MVBS["Sv"][0, 0, :].data)) == 2 - elif func == "nanmean": - # Ensure that all values in regridded are non-NaN when func is nanmean + else: + # Ensure that all values in regridded are non-NaN when skipna is True. assert not np.isnan(ds_MVBS["Sv"][0, 0, :].data).any() From ebd8ef5e452dd7b5e04665434d63d168fcdb575c Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Mon, 22 Apr 2024 16:52:07 +0000 Subject: [PATCH 07/15] add nan options for NASC, revert compute MVBS test, create test for smaller subset of ds Sv --- echopype/commongrid/api.py | 9 +- echopype/commongrid/utils.py | 13 ++- .../tests/commongrid/test_commongrid_api.py | 110 +++++++++--------- 3 files changed, 73 insertions(+), 59 deletions(-) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 20f6a16df..33f889c26 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -31,7 +31,7 @@ def compute_MVBS( ds_Sv: xr.Dataset, range_var: Literal["echo_range", "depth"] = "echo_range", range_bin: str = "20m", - ping_time_bin: str = "20S", + ping_time_bin: str = "20s", method="map-reduce", skipna=True, closed: Literal["left", "right"] = "left", @@ -57,7 +57,7 @@ def compute_MVBS( ``depth`` as a data variable. range_bin : str, default '20m' bin size along ``echo_range`` or ``depth`` in meters. - ping_time_bin : str, default '20S' + ping_time_bin : str, default '20s' bin size along ``ping_time`` method: str, default 'map-reduce' The flox strategy for reduction of dask arrays only. @@ -273,6 +273,7 @@ def compute_NASC( range_bin: str = "10m", dist_bin: str = "0.5nmi", method: str = "map-reduce", + skipna=True, closed: Literal["left", "right"] = "left", **flox_kwargs, ) -> xr.Dataset: @@ -292,6 +293,9 @@ def compute_NASC( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. + skipna: bool, default True + If true, mean function skips NaN values. + Else, mean function includes NaN values. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs @@ -362,6 +366,7 @@ def compute_NASC( range_interval, dist_interval, method=method, + skipna=skipna, **flox_kwargs, ) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index 59eef0e08..d541233d6 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -86,6 +86,7 @@ def compute_raw_NASC( range_interval: Union[pd.IntervalIndex, np.ndarray], dist_interval: Union[pd.IntervalIndex, np.ndarray], method="map-reduce", + skipna=True, **flox_kwargs, ): """ @@ -106,6 +107,9 @@ def compute_raw_NASC( The flox strategy for reduction of dask arrays only. See flox `documentation `_ for more details. + skipna: bool, default True + If true, mean function skips NaN values. + Else, mean function includes NaN values. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. @@ -132,6 +136,8 @@ def compute_raw_NASC( x_var=x_var, range_var=range_var, method=method, + func="nanmean" if skipna else "mean", + skipna=skipna, **flox_kwargs, ) @@ -142,6 +148,7 @@ def compute_raw_NASC( ds_Sv["ping_time"], ds_Sv[x_var], func="nanmean", + skipna=True, expected_groups=(dist_interval), isbin=True, method=method, @@ -160,7 +167,8 @@ def compute_raw_NASC( h_mean_denom = xarray_reduce( da_denom, ds_Sv[x_var], - func="sum", + func="nansum", + skipna=True, expected_groups=(dist_interval), isbin=[True], method=method, @@ -171,7 +179,8 @@ def compute_raw_NASC( ds_Sv["channel"], ds_Sv[x_var], ds_Sv[range_var].isel(**{range_dim: slice(0, -1)}), - func="sum", + func="nansum", + skipna=True, expected_groups=(None, dist_interval, range_interval), isbin=[False, True, True], method=method, diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index 9a3e23369..faf4ad6ea 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -3,7 +3,6 @@ import numpy as np import pandas as pd from flox.xarray import xarray_reduce -import xarray as xr import echopype as ep from echopype.consolidate import add_location, add_depth from echopype.commongrid.utils import ( @@ -342,23 +341,14 @@ def test_compute_MVBS_range_output(request, er_type): @pytest.mark.integration @pytest.mark.parametrize( - ("er_type", "skipna", "add_nan"), + ("er_type"), [ - ("regular", True, "no_add_nan"), - ("irregular", True, "no_add_nan"), - ("regular", False, "no_add_nan"), - ("irregular", False, "no_add_nan"), - ("regular", True, "add_nan"), - ("regular", False, "add_nan"), - ("irregular", True, "add_nan"), - ("irregular", False, "add_nan"), + ("regular"), + ("irregular"), ], ) -def test_compute_MVBS_values(request, er_type, skipna, add_nan): - """ - Tests for the values of compute_MVBS on regular vsirregular, skipna as True vs False, - and added NaN vs no added Nan data. - """ +def test_compute_MVBS_values(request, er_type): + """Tests for the values of compute_MVBS on regular and irregular data.""" def _parse_nans(mvbs, ds_Sv) -> np.ndarray: """Go through and figure out nan values in result""" @@ -411,46 +401,18 @@ def _parse_nans(mvbs, ds_Sv) -> np.ndarray: ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") expected_mvbs = request.getfixturevalue("mock_mvbs_array_irregular") - # Check to see if MVBS matches request fixture arrays - if add_nan == "no_add_nan": - # Compute MVBS - ds_MVBS = ep.commongrid.compute_MVBS( - ds_Sv, - range_bin=range_bin, - ping_time_bin=ping_time_bin, - skipna=skipna - ) - - # Compute expected outputs - expected_outputs = _parse_nans(ds_MVBS, ds_Sv) - - assert ds_MVBS.Sv.shape == expected_mvbs.shape - # Floating digits need to check with all close not equal - # Compare the values of the MVBS array with the expected values - assert np.allclose(ds_MVBS.Sv.values, expected_mvbs, atol=1e-10, rtol=1e-10, equal_nan=True) - - # Ensures that the computation of MVBS takes doesn't take into account NaN values - # that are sporadically placed in the echo_range values - assert np.array_equal(np.isnan(ds_MVBS.Sv.values), expected_outputs) - - # Check appropriate output when NaNs are added to first channel - elif add_nan == "add_nan": - # Add 5 NaN values to ds_Sv and compute MVBS - ds_Sv["Sv"][0, 0, 0:5] = np.nan - ds_MVBS = ep.commongrid.compute_MVBS( - ds_Sv, - range_bin=range_bin, - ping_time_bin=ping_time_bin, - skipna=skipna - ) - if not skipna: - # Ensure that the 5 NaN Sv values, now projected onto the regridded dataset, turn into 2 NaN values in the case - # where skipna is False. - assert np.array_equal(ds_MVBS["Sv"][0, 0, 0:2].data, np.array([np.nan, np.nan]), equal_nan=True) - assert np.sum(np.isnan(ds_MVBS["Sv"][0, 0, :].data)) == 2 - else: - # Ensure that all values in regridded are non-NaN when skipna is True. - assert not np.isnan(ds_MVBS["Sv"][0, 0, :].data).any() + ds_MVBS = ep.commongrid.compute_MVBS(ds_Sv, range_bin=range_bin, ping_time_bin=ping_time_bin) + + expected_outputs = _parse_nans(ds_MVBS, ds_Sv) + + assert ds_MVBS.Sv.shape == expected_mvbs.shape + # Floating digits need to check with all close not equal + # Compare the values of the MVBS array with the expected values + assert np.allclose(ds_MVBS.Sv.values, expected_mvbs, atol=1e-10, rtol=1e-10, equal_nan=True) + + # Ensures that the computation of MVBS takes doesn't take into account NaN values + # that are sporadically placed in the echo_range values + assert np.array_equal(np.isnan(ds_MVBS.Sv.values), expected_outputs) @pytest.mark.integration @@ -484,3 +446,41 @@ def test_compute_NASC_values(request, er_type): assert np.allclose( ds_NASC.NASC.values, expected_nasc.values, atol=1e-10, rtol=1e-10, equal_nan=True ) + + +@pytest.mark.integration +@pytest.mark.parametrize( + ("operation","skipna"), + [ + ("MVBS", True), + ("NASC", False), + ], +) +def test_compute_MVBS_NASC_skipna_nan_and_non_nan_values(request, operation, skipna): + # Create subset dataset with 2 channels, 2 ping times, 2 depth values: + + # Get fixture for irregular Sv + ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") + # Already has 2 channels, so subset for only ping time and range sample + subset_ds_Sv = ds_Sv.isel(ping_time=slice(0,2), range_sample=slice(0,2)) + # Set the 1 NaN echo range value to non-NaN value + subset_ds_Sv["echo_range"][0, 1, 1] = subset_ds_Sv["echo_range"][0, 0, 1].data + # Set one of the first channel values to NaN + subset_ds_Sv["Sv"][0, 1, 1] = np.nan + + # Compute MVBS / Compute NASC + if operation == "MVBS": + da = ep.commongrid.compute_MVBS(subset_ds_Sv, skipna=skipna)["Sv"] + else: + da = ep.commongrid.compute_NASC(subset_ds_Sv, skipna=skipna)["NASC"] + + # Check that da is 2 values: 1 value for each channel + assert da.shape == (2, 1, 1) + + # Check for appropriate NaN/non-NaN values + if skipna: + assert not np.isnan(da.isel(channel=0).squeeze().data) + assert not np.isnan(da.isel(channel=1).squeeze().data) + else: + assert np.isnan(da.isel(channel=0).squeeze().data) + assert not np.isnan(da.isel(channel=1).squeeze().data) \ No newline at end of file From 8c10add9bf3bdf4d5238e0e7bcfbfeba5f53ab27 Mon Sep 17 00:00:00 2001 From: Wu-Jung Lee Date: Tue, 23 Apr 2024 06:54:51 -0700 Subject: [PATCH 08/15] fix mock_Sv_dataset_irregular so that mock Sv and echo_range have the same nan elements --- echopype/tests/commongrid/conftest.py | 15 ++++++++------- echopype/tests/commongrid/test_commongrid_api.py | 6 +----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/echopype/tests/commongrid/conftest.py b/echopype/tests/commongrid/conftest.py index 6e67f9f6d..f6b73d083 100644 --- a/echopype/tests/commongrid/conftest.py +++ b/echopype/tests/commongrid/conftest.py @@ -162,6 +162,7 @@ def mock_Sv_dataset_irregular( # Sprinkle nans around echo_range for pos in mock_nan_ilocs: ds_Sv["echo_range"][pos] = np.nan + ds_Sv["Sv"][pos] = np.nan return ds_Sv @@ -201,7 +202,7 @@ def mock_nasc_array_regular(mock_Sv_dataset_regular, mock_parameters): dist_bin = 0.5 range_bin = 2 channel_len = mock_parameters["channel_len"] - expected_nasc_val = _get_expected_nasc_val(ds_Sv, dist_bin, range_bin, channel_len) + expected_nasc_val = _get_expected_nasc_val_nanmean(ds_Sv, dist_bin, range_bin, channel_len) return expected_nasc_val @@ -237,7 +238,7 @@ def mock_nasc_array_irregular(mock_Sv_dataset_irregular, mock_parameters): dist_bin = 0.5 range_bin = 2 channel_len = mock_parameters["channel_len"] - expected_nasc_val = _get_expected_nasc_val(ds_Sv, dist_bin, range_bin, channel_len) + expected_nasc_val = _get_expected_nasc_val_nanmean(ds_Sv, dist_bin, range_bin, channel_len) return expected_nasc_val @@ -463,12 +464,12 @@ def mock_Sv_dataset_NASC(mock_parameters, random_number_generator): # Helper functions to generate mock Sv and MVBS dataset -def _get_expected_nasc_val( +def _get_expected_nasc_val_nanmean( ds_Sv: xr.Dataset, dist_bin: str, range_bin: float, channel_len: int = 2 ) -> np.ndarray: """ Helper functions to generate expected NASC outputs from mock Sv dataset - by brute-force looping and compute the mean + by brute-force looping and compute the nanmean Parameters ---------- @@ -500,7 +501,7 @@ def _get_expected_nasc_val( sv = ds_Sv["Sv"].pipe(ep.utils.compute._log2lin) # Compute sv mean - sv_mean = _brute_mean_reduce_3d( + sv_mean = _brute_nanmean_reduce_3d( sv, ds_Sv, "depth", "distance_nmi", channel_len, dist_interval, range_interval ) @@ -544,7 +545,7 @@ def _get_expected_nasc_val( return sv_mean * h_mean * 4 * np.pi * 1852**2 -def _brute_mean_reduce_3d( +def _brute_nanmean_reduce_3d( sv: xr.DataArray, ds_Sv: xr.Dataset, range_var: Literal["echo_range", "depth"], @@ -610,7 +611,7 @@ def _brute_mean_reduce_3d( if 0 in sv_tmp.shape: mean_vals[ch_idx, x_idx, r_idx] = np.nan else: - mean_vals[ch_idx, x_idx, r_idx] = np.mean(sv_tmp) + mean_vals[ch_idx, x_idx, r_idx] = np.nanmean(sv_tmp) return mean_vals diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index faf4ad6ea..f850f1d1e 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -438,7 +438,7 @@ def test_compute_NASC_values(request, er_type): ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") expected_nasc = request.getfixturevalue("mock_nasc_array_irregular") - ds_NASC = ep.commongrid.compute_NASC(ds_Sv, range_bin=range_bin, dist_bin=dist_bin) + ds_NASC = ep.commongrid.compute_NASC(ds_Sv, range_bin=range_bin, dist_bin=dist_bin, skipna=True) assert ds_NASC.NASC.shape == expected_nasc.shape # Floating digits need to check with all close not equal @@ -463,10 +463,6 @@ def test_compute_MVBS_NASC_skipna_nan_and_non_nan_values(request, operation, ski ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") # Already has 2 channels, so subset for only ping time and range sample subset_ds_Sv = ds_Sv.isel(ping_time=slice(0,2), range_sample=slice(0,2)) - # Set the 1 NaN echo range value to non-NaN value - subset_ds_Sv["echo_range"][0, 1, 1] = subset_ds_Sv["echo_range"][0, 0, 1].data - # Set one of the first channel values to NaN - subset_ds_Sv["Sv"][0, 1, 1] = np.nan # Compute MVBS / Compute NASC if operation == "MVBS": From be4c2b7d42091ca405fbab0dbb6e33f384651603 Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:01:14 -0700 Subject: [PATCH 09/15] edit docstring wording Co-authored-by: Wu-Jung Lee --- echopype/commongrid/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index 33f889c26..ea1c98ad1 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -64,8 +64,8 @@ def compute_MVBS( See flox `documentation `_ for more details. skipna: bool, default True - If true, mean function skips NaN values. - Else, mean function includes NaN values. + If true, the mean operation skips NaN values. + Else, the mean operation includes NaN values. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs From 1b8f21d9f0bbe1df92577809d9b333c726998a98 Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:01:24 -0700 Subject: [PATCH 10/15] edit docstring wording Co-authored-by: Wu-Jung Lee --- echopype/commongrid/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index ea1c98ad1..0b1a1eb58 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -294,8 +294,8 @@ def compute_NASC( See flox `documentation `_ for more details. skipna: bool, default True - If true, mean function skips NaN values. - Else, mean function includes NaN values. + If true, the mean operation skips NaN values. + Else, the mean operation includes NaN values. closed: {'left', 'right'}, default 'left' Which side of bin interval is closed. **flox_kwargs From db2d32f66dcca6a1c25111519b6fda38cf14a95e Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:01:37 -0700 Subject: [PATCH 11/15] edit docstring wording Co-authored-by: Wu-Jung Lee --- echopype/commongrid/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index d541233d6..5560b3fb8 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -47,8 +47,8 @@ def compute_raw_MVBS( See flox `documentation `_ for more details. skipna: bool, default True - If true, mean function skips NaN values. - Else, mean function includes NaN values. + If true, the mean operation skips NaN values. + Else, the mean operation includes NaN values. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. From 971b2f0e419daf1715e74307a79db6496613bc5a Mon Sep 17 00:00:00 2001 From: Caesar Tuguinay <87830138+ctuguinay@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:01:51 -0700 Subject: [PATCH 12/15] edit docstring wording Co-authored-by: Wu-Jung Lee --- echopype/commongrid/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index 5560b3fb8..d430f7ece 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -108,8 +108,8 @@ def compute_raw_NASC( See flox `documentation `_ for more details. skipna: bool, default True - If true, mean function skips NaN values. - Else, mean function includes NaN values. + If true, the mean operation skips NaN values. + Else, the mean operation includes NaN values. **flox_kwargs Additional keyword arguments to be passed to flox reduction function. From c83bc4aca72aec55f4b534648f6addda54359088 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 23 Apr 2024 21:24:24 +0000 Subject: [PATCH 13/15] write tests for full range sample and test case for nan coordinates; add warning where coordinates have NaNs --- echopype/commongrid/utils.py | 17 +++- .../tests/commongrid/test_commongrid_api.py | 94 +++++++++++++++---- 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index d430f7ece..8939c071d 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -61,7 +61,6 @@ def compute_raw_MVBS( # Set initial variables ds = xr.Dataset() x_var = "ping_time" - sv_mean = _groupby_x_along_channels( ds_Sv, range_interval, @@ -566,6 +565,22 @@ def _groupby_x_along_channels( # average should be done in linear domain sv = ds_Sv["Sv"].pipe(_log2lin) + # Check for any NaNs in the coordinate arrays: + named_arrays = { + x_var: ds_Sv[x_var].data, + range_var: ds_Sv[range_var].data, + } + aggregation_msg = ( + "Aggregation may be negatively impacted since Flox will not aggregate any " + "```Sv``` values that have corresponding NaN coordinate values. Consider handling " + "these values before calling your intended commongrid function." + ) + for array_name, array in named_arrays.items(): + if np.isnan(array).any(): + logging.warning( + f"The ```{array_name}``` coordinate array contain NaNs. {aggregation_msg}" + ) + # reduce along ping_time or distance_nmi # and echo_range or depth # by binning and averaging diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index f850f1d1e..75d3a4516 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd from flox.xarray import xarray_reduce +import xarray as xr import echopype as ep from echopype.consolidate import add_location, add_depth from echopype.commongrid.utils import ( @@ -447,36 +448,89 @@ def test_compute_NASC_values(request, er_type): ds_NASC.NASC.values, expected_nasc.values, atol=1e-10, rtol=1e-10, equal_nan=True ) - @pytest.mark.integration @pytest.mark.parametrize( - ("operation","skipna"), + ("operation","skipna", "range_var"), [ - ("MVBS", True), - ("NASC", False), + ("MVBS", True, "depth"), + ("MVBS", False, "depth"), + ("MVBS", True, "echo_range"), + ("MVBS", False, "echo_range"), + # NASC `range_var` always defaults to `depth` so we set this as `None``. + ("NASC", True, None), + ("NASC", False, None), ], ) -def test_compute_MVBS_NASC_skipna_nan_and_non_nan_values(request, operation, skipna): - # Create subset dataset with 2 channels, 2 ping times, 2 depth values: +def test_compute_MVBS_NASC_skipna_nan_and_non_nan_values( + request, + operation, + skipna, + range_var, + caplog, +): + # Create subset dataset with 2 channels, 2 ping times, and 20 range samples: # Get fixture for irregular Sv ds_Sv = request.getfixturevalue("mock_Sv_dataset_irregular") - # Already has 2 channels, so subset for only ping time and range sample - subset_ds_Sv = ds_Sv.isel(ping_time=slice(0,2), range_sample=slice(0,2)) + # Already has 2 channels and 20 range samples, so subset for only ping time + subset_ds_Sv = ds_Sv.isel(ping_time=slice(0,2)) # Compute MVBS / Compute NASC if operation == "MVBS": - da = ep.commongrid.compute_MVBS(subset_ds_Sv, skipna=skipna)["Sv"] + if range_var == "echo_range": + # Turn on logger verbosity + ep.utils.log.verbose(override=False) + + da = ep.commongrid.compute_MVBS( + subset_ds_Sv, + range_var=range_var, + range_bin="2m", + skipna=skipna + )["Sv"] + + if range_var == "echo_range": + # Check for appropriate warning + aggregation_msg = ( + "Aggregation may be negatively impacted since Flox will not aggregate any " + "```Sv``` values that have corresponding NaN coordinate values. Consider handling " + "these values before calling your intended commongrid function." + ) + expected_warning = f"The ```echo_range``` coordinate array contain NaNs. {aggregation_msg}" + assert any(expected_warning in record.message for record in caplog.records) + + # Turn off logger verbosity + ep.utils.log.verbose(override=True) else: - da = ep.commongrid.compute_NASC(subset_ds_Sv, skipna=skipna)["NASC"] - - # Check that da is 2 values: 1 value for each channel - assert da.shape == (2, 1, 1) - - # Check for appropriate NaN/non-NaN values - if skipna: - assert not np.isnan(da.isel(channel=0).squeeze().data) - assert not np.isnan(da.isel(channel=1).squeeze().data) + da = ep.commongrid.compute_NASC(subset_ds_Sv, range_bin="2m", skipna=skipna)["NASC"] + + # Create NaN Mask: True if NaN, False if Not + da_nan_mask = np.isnan(da.data) + + # Check for appropriate NaN/non-NaN values: + + if range_var == "echo_range": + # Note that ALL `Sv` values that are `NaN` have corresponding `echo_range` + # values that are also ALL `NaN``. Flox does not bin any values that have `NaN` + # coordinates, and so none of the values that are aggregated into the 5 bins + # have any `NaNs` that are aggregated into them. + expected_values = [ + [[False, False, False, False, False]], + [[False, False, False, False, False]] + ] + assert np.array_equal(da_nan_mask, np.array(expected_values)) else: - assert np.isnan(da.isel(channel=0).squeeze().data) - assert not np.isnan(da.isel(channel=1).squeeze().data) \ No newline at end of file + # Note that the first value along the depth dimension is always NaN due to the minimum + # depth value in the Sv dataset being 2.5 (2.5m from the added offset), since both MVBS and + # NASC outputs start at depth=0 and so the first depth bin (0m-2m) doesn't contain anything. + if skipna: + expected_values = [ + [[True, False, False, False, False, False]], + [[True, False, False, False, False, False]] + ] + assert np.array_equal(da_nan_mask, np.array(expected_values)) + else: + expected_values = [ + [[True, True, True, False, False, True]], + [[True, False, False, True, True, True]] + ] + assert np.array_equal(da_nan_mask, np.array(expected_values)) From 5ef0a52156668d43cbf43451f98fe6939822ced5 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 23 Apr 2024 21:26:20 +0000 Subject: [PATCH 14/15] revert space --- echopype/commongrid/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index 8939c071d..f80c5a257 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -61,6 +61,7 @@ def compute_raw_MVBS( # Set initial variables ds = xr.Dataset() x_var = "ping_time" + sv_mean = _groupby_x_along_channels( ds_Sv, range_interval, From 8e95b999c8d08982a1a88db06598eb9a4771c284 Mon Sep 17 00:00:00 2001 From: Wu-Jung Lee Date: Tue, 23 Apr 2024 14:53:19 -0700 Subject: [PATCH 15/15] fix flake8 error --- echopype/echodata/echodata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopype/echodata/echodata.py b/echopype/echodata/echodata.py index 1f04a8abc..4c0663759 100644 --- a/echopype/echodata/echodata.py +++ b/echopype/echodata/echodata.py @@ -378,7 +378,7 @@ def update_platform( } ) time_dims_max = max([int(dim[-1]) for dim in platform.dims if dim.startswith("time")]) - new_time_dims = [f"time{time_dims_max+i+1}" for i in range(len(ext_time_dims))] + new_time_dims = [f"time{time_dims_max + i + 1}" for i in range(len(ext_time_dims))] # Map each new time dim name to the external time dim name: new_time_dims_mappings = {new: ext for new, ext in zip(new_time_dims, ext_time_dims)}