From a4cca797f0908b7f4dd5925cc9b718d5bea842ef Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Wed, 22 Nov 2023 05:04:46 +0000 Subject: [PATCH 01/11] allow depth dimension into coordinates for apply mask and incorporate this change into tests --- echopype/mask/api.py | 20 ++++++++++++++------ echopype/tests/mask/test_mask.py | 21 +++++++++++---------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 8440603d2..33436134d 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -105,10 +105,17 @@ def _validate_and_collect_mask_input( # the coordinate sequence matters, so fix the tuple form allowed_dims = [ ("ping_time", "range_sample"), + ("ping_time", "depth"), ("channel", "ping_time", "range_sample"), + ("channel", "ping_time", "depth"), ] if mask[mask_ind].dims not in allowed_dims: - raise ValueError("All masks must have dimensions ('ping_time', 'range_sample')!") + raise ValueError( + "Masks must have one of the following dimensions: " + "('ping_time', 'range_sample'), ('ping_time', 'depth'), " + "('channel', 'ping_time', 'range_sample'), " + "('channel', 'ping_time', 'depth')" + ) else: if not isinstance(storage_options_mask, dict): @@ -248,7 +255,7 @@ def apply_mask( source_ds: Union[xr.Dataset, str, pathlib.Path], mask: Union[xr.DataArray, str, pathlib.Path, List[Union[xr.DataArray, str, pathlib.Path]]], var_name: str = "Sv", - fill_value: Union[int, float, np.ndarray, xr.DataArray] = np.nan, + fill_value: Union[int, float, xr.DataArray] = np.nan, storage_options_ds: dict = {}, storage_options_mask: Union[dict, List[dict]] = {}, ) -> xr.Dataset: @@ -263,16 +270,17 @@ def apply_mask( mask: xr.DataArray, str, pathlib.Path, or a list of these datatypes The mask(s) to be applied. Can be a single input or list that corresponds to a DataArray or a path. - Each entry in the list must have dimensions ``('ping_time', 'range_sample')``. - Multi-channel masks are not currently supported. + Each entry in the list must have dimensions ``('ping_time', 'range_sample')`` or + dimensions ``('ping_time', 'depth')``. If a path is provided this should point to a zarr or netcdf file with only one data variable in it. If the input ``mask`` is a list, a logical AND will be used to produce the final mask that will be applied to ``var_name``. var_name: str, default="Sv" The Sv variable name in ``source_ds`` that the mask should be applied to. - This variable needs to have coordinates ``ping_time`` and ``range_sample``, - and can optionally also have coordinate ``channel``. + This variable needs to have coordinates ``('ping_time', 'range_sample')`` or + coordinates ``('ping_time', 'depth')``, and can optionally also have coordinate + ``channel``. In the case of a multi-channel Sv data variable, the ``mask`` will be broadcast to all channels. fill_value: int, float, np.ndarray, or xr.DataArray, default=np.nan diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 7030f16db..7e692974f 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -119,6 +119,7 @@ def get_mock_source_ds_apply_mask(n: int, n_chan: int, is_delayed: bool) -> xr.D ------- xr.Dataset A Dataset containing data variables ``var1, var2`` with coordinates + ``('channel', 'ping_time', 'depth')`` and ``('channel', 'ping_time', 'range_sample')``. The variables are square matrices of ones for each ``channel``. """ @@ -135,7 +136,7 @@ def get_mock_source_ds_apply_mask(n: int, n_chan: int, is_delayed: bool) -> xr.D # create mock var1 and var2 DataArrays mock_var1_da = xr.DataArray(data=np.stack(mock_var_data), coords={"channel": ("channel", chan_vals, {"long_name": "channel name"}), - "ping_time": np.arange(n), "range_sample": np.arange(n)}, + "ping_time": np.arange(n), "depth": np.arange(n)}, attrs={"long_name": "variable 1"}) mock_var2_da = xr.DataArray(data=np.stack(mock_var_data), coords={"channel": ("channel", chan_vals, {"long_name": "channel name"}), @@ -505,19 +506,19 @@ def test_validate_and_collect_mask_input( marks=pytest.mark.xfail(strict=True, reason="This should fail because mock_ds will " "not have var_name=var3 in it.")), - pytest.param(4, 2, "var1", "1.0", + pytest.param(4, 2, "var2", "1.0", marks=pytest.mark.xfail(strict=True, reason="This should fail because fill_value is an incorrect type.")), (4, 2, "var1", 1), (4, 2, "var1", 1.0), (2, 1, "var1", np.identity(2)[None, :]), (2, 1, "var1", xr.DataArray(data=np.array([[[1.0, 0], [0, 1]]]), - coords={"channel": ["chan1"], "ping_time": [0, 1], "range_sample": [0, 1]}) + coords={"channel": ["chan1"], "ping_time": [0, 1], "depth": [0, 1]}) ), - pytest.param(4, 2, "var1", np.identity(2), + pytest.param(4, 2, "var2", np.identity(2), marks=pytest.mark.xfail(strict=True, reason="This should fail because fill_value is not the right shape.")), - pytest.param(4, 2, "var1", + pytest.param(4, 2, "var2", xr.DataArray(data=np.array([[1.0, 0], [0, 1]]), coords={"ping_time": [0, 1], "range_sample": [0, 1]}), marks=pytest.mark.xfail(strict=True, @@ -633,7 +634,7 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, mock_ds = get_mock_source_ds_apply_mask(n, n_chan, is_delayed) # create input mask and obtain temporary directory, if it was created - mask, temp_dir = create_input_mask(mask, mask_file, mock_ds.coords) + mask, temp_dir = create_input_mask(mask, mask_file, mock_ds[var_name].coords) # create DataArray form of the known truth value var_masked_truth = xr.DataArray(data=np.stack([var_masked_truth for i in range(n_chan)]), @@ -685,13 +686,13 @@ def test_apply_mask_channel_variation(source_has_ch, mask_has_ch): if mask_has_ch: mask = xr.DataArray( np.array([np.identity(2)]), - coords={"channel": ["chA"], "ping_time": np.arange(2), "range_sample": np.arange(2)}, + coords={"channel": ["chA"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ) else: mask = xr.DataArray( np.identity(2), - coords={"ping_time": np.arange(2), "range_sample": np.arange(2)}, + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, ) @@ -706,13 +707,13 @@ def test_apply_mask_channel_variation(source_has_ch, mask_has_ch): if source_has_ch: truth_da = xr.DataArray( np.array([[[1, np.nan], [np.nan, 1]]] * 3), - coords={"channel": ["chan1", "chan2", "chan3"], "ping_time": np.arange(2), "range_sample": np.arange(2)}, + coords={"channel": ["chan1", "chan2", "chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs=source_ds[var_name].attrs ) else: truth_da = xr.DataArray( [[1, np.nan], [np.nan, 1]], - coords={"ping_time": np.arange(2), "range_sample": np.arange(2)}, + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs=source_ds[var_name].attrs ) From d8afc9bcb88058d3f65e4b273d73df9969ed5621 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Wed, 22 Nov 2023 19:03:07 +0000 Subject: [PATCH 02/11] add channel dimension consistency check --- echopype/mask/api.py | 9 +++++++ echopype/tests/mask/test_mask.py | 43 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 33436134d..45a6e6ad3 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -117,6 +117,15 @@ def _validate_and_collect_mask_input( "('channel', 'ping_time', 'depth')" ) + # Check for the channel dimension consistency + channel_dim_shapes = set() + for mask_indiv in mask: + if "channel" in mask_indiv.dims: + for mask_chan_ind in range(len(mask_indiv["channel"])): + channel_dim_shapes.add(mask_indiv.isel(channel=mask_chan_ind).shape) + if len(channel_dim_shapes) > 1: + raise ValueError("All masks must have the same shape in the 'channel' dimension.") + else: if not isinstance(storage_options_mask, dict): raise ValueError( diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 7e692974f..b0bccbf3d 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -496,6 +496,49 @@ def test_validate_and_collect_mask_input( assert mask_out.identical(mask_da) +@pytest.mark.parametrize( + ("mask_list"), + [ + pytest.param( + [xr.DataArray([np.identity(4)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0']})] + ), + pytest.param( + [xr.DataArray([np.identity(4), np.identity(4)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0', 'channel_1']})] + ), + pytest.param( + [xr.DataArray([np.identity(4), np.identity(4)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0', 'channel_1']}), + xr.DataArray([np.identity(4), np.identity(4)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0', 'channel_1']})] + ), + pytest.param( + [xr.DataArray([np.identity(3), np.identity(3)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0', 'channel_1']}), + xr.DataArray([np.identity(4), np.identity(4)], dims=['channel', 'ping_time', 'depth'], + coords={'channel': ['channel_0', 'channel_1']})], + marks=pytest.mark.xfail( + strict=True, + reason="This should fail because the channel dims are not uniform." + )) + ], + ids=["single_channel_mask", "double_channel", "double_channel_double_masks", + "inconsistent_channels_across_two_masks"] +) +def test_multi_mask_validate_and_collect_mask(mask_list: List[xr.DataArray]): + """ + Tests the allowable types and dimensions for multimask input. + + Parameters + ---------- + mask_list: List[xr.DataArray] + Multimask input to be tested in validate and collect mask input. + """ + + _validate_and_collect_mask_input(mask=mask_list, storage_options_mask={}) + + @pytest.mark.parametrize( ("n", "n_chan", "var_name", "fill_value"), [ From 944b0d27e4c0daebd2546234f30a941d1c76be56 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Wed, 22 Nov 2023 19:20:02 +0000 Subject: [PATCH 03/11] modification so that fill value cannot be of type np ndarray --- echopype/mask/api.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 45a6e6ad3..13842662f 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -144,7 +144,7 @@ def _validate_and_collect_mask_input( def _check_var_name_fill_value( - source_ds: xr.Dataset, var_name: str, fill_value: Union[int, float, np.ndarray, xr.DataArray] + source_ds: xr.Dataset, var_name: str, fill_value: Union[int, float, xr.DataArray] ) -> Union[int, float, np.ndarray, xr.DataArray]: """ Ensures that the inputs ``var_name`` and ``fill_value`` for the function @@ -156,12 +156,12 @@ def _check_var_name_fill_value( A Dataset that contains the variable ``var_name`` var_name: str The variable name in ``source_ds`` that the mask should be applied to - fill_value: int or float or np.ndarray or xr.DataArray + fill_value: int, float, or xr.DataArray Specifies the value(s) at false indices Returns ------- - fill_value: int or float or np.ndarray or xr.DataArray + fill_value: int, float, or xr.DataArray fill_value with sanitized dimensions Raises @@ -183,17 +183,12 @@ def _check_var_name_fill_value( raise ValueError("The Dataset source_ds does not contain the variable var_name!") # check the type of fill_value - if not isinstance(fill_value, (int, float, np.ndarray, xr.DataArray)): - raise TypeError( - "The input fill_value must be of type int or " "float or np.ndarray or xr.DataArray!" - ) + if not isinstance(fill_value, (int, float, xr.DataArray)): + raise TypeError("The input fill_value must be of type int, float, or xr.DataArray!") # make sure that fill_values is the same shape as var_name - if isinstance(fill_value, (np.ndarray, xr.DataArray)): - if isinstance(fill_value, xr.DataArray): - fill_value = fill_value.data.squeeze() # squeeze out length=1 channel dimension - elif isinstance(fill_value, np.ndarray): - fill_value = fill_value.squeeze() # squeeze out length=1 channel dimension + if isinstance(fill_value, xr.DataArray): + fill_value = fill_value.data.squeeze() # squeeze out length=1 channel dimension source_ds_shape = ( source_ds[var_name].isel(channel=0).shape @@ -292,10 +287,10 @@ def apply_mask( ``channel``. In the case of a multi-channel Sv data variable, the ``mask`` will be broadcast to all channels. - fill_value: int, float, np.ndarray, or xr.DataArray, default=np.nan + fill_value: int, float, or xr.DataArray, default=np.nan Value(s) at masked indices. - If ``fill_value`` is of type ``np.ndarray`` or ``xr.DataArray``, - it must have the same shape as each entry of ``mask``. + If ``fill_value`` is of type ``xr.DataArray`` it must have the same shape as each + entry of ``mask``. storage_options_ds: dict, default={} Any additional parameters for the storage backend, corresponding to the path provided for ``source_ds`` From d1420128d63af10ebbe7da2a74e1a73599c4b858 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Wed, 22 Nov 2023 19:31:29 +0000 Subject: [PATCH 04/11] fix fill value incorrect tests --- echopype/tests/mask/test_mask.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index b0bccbf3d..b2ffd1a29 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -554,13 +554,12 @@ def test_multi_mask_validate_and_collect_mask(mask_list: List[xr.DataArray]): reason="This should fail because fill_value is an incorrect type.")), (4, 2, "var1", 1), (4, 2, "var1", 1.0), - (2, 1, "var1", np.identity(2)[None, :]), + pytest.param(2, 1, "var1", np.identity(2)[None, :], + marks=pytest.mark.xfail(strict=True, + reason="This should fail because fill_value is an incorrect type.")), (2, 1, "var1", xr.DataArray(data=np.array([[[1.0, 0], [0, 1]]]), coords={"channel": ["chan1"], "ping_time": [0, 1], "depth": [0, 1]}) - ), - pytest.param(4, 2, "var2", np.identity(2), - marks=pytest.mark.xfail(strict=True, - reason="This should fail because fill_value is not the right shape.")), + ), pytest.param(4, 2, "var2", xr.DataArray(data=np.array([[1.0, 0], [0, 1]]), coords={"ping_time": [0, 1], "range_sample": [0, 1]}), @@ -569,7 +568,7 @@ def test_multi_mask_validate_and_collect_mask(mask_list: List[xr.DataArray]): ], ids=["wrong_var_name_type", "no_var_name_ds", "wrong_fill_value_type", "fill_value_int", "fill_value_float", "fill_value_np_array", "fill_value_DataArray", - "fill_value_np_array_wrong_shape", "fill_value_DataArray_wrong_shape"] + "fill_value_DataArray_wrong_shape"] ) def test_check_var_name_fill_value(n: int, n_chan: int, var_name: str, fill_value: Union[int, float, np.ndarray, xr.DataArray]): @@ -605,8 +604,11 @@ def test_check_var_name_fill_value(n: int, n_chan: int, var_name: str, # single_mask_float_fill (2, 1, "var1", np.identity(2), None, 2.0, False, np.array([[1, 2.0], [2.0, 1]]), False), # single_mask_np_array_fill - (2, 1, "var1", np.identity(2), None, np.array([[[np.nan, np.nan], [np.nan, np.nan]]]), - False, np.array([[1, np.nan], [np.nan, 1]]), False), + pytest.param( + 2, 1, "var1", np.identity(2), None, np.array([[[np.nan, np.nan], [np.nan, np.nan]]]), + False, np.array([[1, np.nan], [np.nan, 1]]), False, + marks=pytest.mark.xfail(strict=True, + reason="This should fail because fill_value is an incorrect type.")), # single_mask_DataArray_fill (2, 1, "var1", np.identity(2), None, xr.DataArray(data=np.array([[[np.nan, np.nan], [np.nan, np.nan]]]), coords={"channel": ["chan1"], From 5548d8d53c153e960accea5b6b779717584b62a8 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Thu, 23 Nov 2023 00:57:03 +0000 Subject: [PATCH 05/11] initial logic for broadcast then np logical and reduce; commented out test that currently needs to be refactored --- echopype/mask/api.py | 31 ++++++++++--------- echopype/tests/mask/test_mask.py | 53 +++++++++++++++++++------------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 13842662f..98a0b7fb5 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -317,26 +317,30 @@ def apply_mask( # Obtain final mask to be applied to var_name if isinstance(mask, list): - # perform a logical AND element-wise operation across the masks - final_mask = np.logical_and.reduce(mask) + # Broadcast all input masks together before combining them (element-wise multiplication) + broadcasted_masks = xr.broadcast(*mask) + + # Perform a logical AND element-wise operation across the masks + final_mask = np.logical_and.reduce(broadcasted_masks) # xr.where has issues with attrs when final_mask is an array, so we make it a DataArray - final_mask = xr.DataArray(final_mask, coords=mask[0].coords) + final_mask = xr.DataArray(final_mask, coords=broadcasted_masks[0].coords) else: final_mask = mask - # Sanity check: final_mask should be of the same shape as source_ds[var_name] - # along the ping_time and range_sample dimensions - def get_ch_shape(da): - return da.isel(channel=0).shape if "channel" in da.dims else da.shape - - # Below operate on the actual data array to be masked + # Operate on the actual data array to be masked source_da = source_ds[var_name] - source_da_shape = get_ch_shape(source_da) - final_mask_shape = get_ch_shape(final_mask) + # Sanity check: final_mask should be of the same shape as source_ds[var_name] + # along the ping_time and range_sample dimensions. + source_da_chan_shape = ( + source_da.isel(channel=0).shape if "channel" in source_da.dims else source_da.shape + ) + final_mask_chan_shape = ( + final_mask.isel(channel=0).shape if "channel" in final_mask.dims else final_mask.shape + ) - if final_mask_shape != source_da_shape: + if final_mask_chan_shape != source_da_chan_shape: raise ValueError( f"The final constructed mask is not of the same shape as source_ds[{var_name}] " "along the ping_time and range_sample dimensions!" @@ -368,12 +372,11 @@ def get_ch_shape(da): _variable_prov_attrs(output_ds[var_name], mask) ) + # Attribute handling process_type = "mask" prov_dict = echopype_prov_attrs(process_type=process_type) prov_dict[f"{process_type}_function"] = "mask.apply_mask" - output_ds = output_ds.assign_attrs(prov_dict) - output_ds = insert_input_processing_level(output_ds, input_ds=source_ds) return output_ds diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index b2ffd1a29..3fafd03c5 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -707,14 +707,37 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, # remove the temporary directory, if it was created temp_dir.cleanup() - +""" @pytest.mark.parametrize( - ("source_has_ch", "mask_has_ch"), + ("source_has_ch", "mask"), [ - (True, True), - (False, True), - (True, False), - (False, False), + (True, [ + xr.DataArray( + np.array([[np.identity(2)], np.identity(2)]), + coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([np.identity(2)]), + coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + ]), + (False, xr.DataArray( + np.array([np.identity(2), np.identity(2)]), + coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + )), + (True, xr.DataArray( + np.identity(2), + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_no_channel"}, + )), + (False, xr.DataArray( + np.identity(2), + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_no_channel"}, + )), ], ids=[ "source_with_ch_mask_with_ch", @@ -723,24 +746,11 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "source_no_ch_mask_no_ch", ] ) -def test_apply_mask_channel_variation(source_has_ch, mask_has_ch): +def test_apply_mask_channel_variation(source_has_ch, mask): source_ds = get_mock_source_ds_apply_mask(2, 3, False) var_name = "var1" - if mask_has_ch: - mask = xr.DataArray( - np.array([np.identity(2)]), - coords={"channel": ["chA"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_with_channel"}, - ) - else: - mask = xr.DataArray( - np.identity(2), - coords={"ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_no_channel"}, - ) - if source_has_ch: masked_ds = echopype.mask.apply_mask(source_ds, mask, var_name) else: @@ -751,7 +761,7 @@ def test_apply_mask_channel_variation(source_has_ch, mask_has_ch): # Output dimension will be the same as source if source_has_ch: truth_da = xr.DataArray( - np.array([[[1, np.nan], [np.nan, 1]]] * 3), + np.array([[[1, 1], [1, 1]]] * 3), coords={"channel": ["chan1", "chan2", "chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs=source_ds[var_name].attrs ) @@ -763,3 +773,4 @@ def test_apply_mask_channel_variation(source_has_ch, mask_has_ch): ) assert masked_ds[var_name].equals(truth_da) +""" \ No newline at end of file From 3198f815ee8c48cdb1a4972387e0ee3a50568a6d Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Fri, 15 Mar 2024 19:11:12 +0000 Subject: [PATCH 06/11] modify apply mask logic --- echopype/mask/api.py | 48 ++++++++++++++------- echopype/tests/mask/test_mask.py | 74 ++++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 98a0b7fb5..b442f22e9 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -273,9 +273,10 @@ def apply_mask( Points to a Dataset that contains the variable the mask should be applied to mask: xr.DataArray, str, pathlib.Path, or a list of these datatypes The mask(s) to be applied. - Can be a single input or list that corresponds to a DataArray or a path. - Each entry in the list must have dimensions ``('ping_time', 'range_sample')`` or - dimensions ``('ping_time', 'depth')``. + Can be a individual input or list that corresponds to a DataArray or a path. + Each individual input or entry in the list must contain dimensions + ``('ping_time', 'range_sample')`` or dimensions ``('ping_time', 'depth')``. + The mask can also contain the dimension ``channel``. If a path is provided this should point to a zarr or netcdf file with only one data variable in it. If the input ``mask`` is a list, a logical AND will be used to produce the final @@ -339,26 +340,43 @@ def apply_mask( final_mask_chan_shape = ( final_mask.isel(channel=0).shape if "channel" in final_mask.dims else final_mask.shape ) - if final_mask_chan_shape != source_da_chan_shape: raise ValueError( f"The final constructed mask is not of the same shape as source_ds[{var_name}] " "along the ping_time and range_sample dimensions!" ) - # final_mask is always an xr.DataArray with at most length=1 channel dimension - if "channel" in final_mask.dims: - final_mask = final_mask.isel(channel=0) + # Apply the mask to var_name + if "channel" in final_mask.dims and "channel" in source_da.dims: + # Identify common channels + common_channels = set(final_mask.coords["channel"].values).intersection( + set(source_da.coords["channel"].values) + ) - # Make sure fill_value and final_mask are expanded in dimensions - if "channel" in source_da.dims: - if isinstance(fill_value, np.ndarray): - fill_value = np.array([fill_value] * source_da["channel"].size) - final_mask = np.array([final_mask.data] * source_da["channel"].size) + # Convert common channels back to a sorted list to maintain order + common_channels = sorted(list(common_channels)) - # Apply the mask to var_name - # Somehow keep_attrs=True errors out here, so will attach later - var_name_masked = xr.where(final_mask, x=source_da, y=fill_value) + # Select common channels for operation + final_mask_common = final_mask.sel(channel=common_channels) + source_da_common = source_da.sel(channel=common_channels) + + # Perform operation on common channels + masked_common = xr.where(final_mask_common, x=source_da_common, y=fill_value) + + # Identify remaining channels + all_channels = set(source_da.coords["channel"].values) + remaining_channels = sorted(list(all_channels - set(common_channels))) + + # Select remaining channels + source_da_remaining = source_da.sel(channel=remaining_channels) + + # Combine modified common channels with unmodified remaining channels + var_name_masked = xr.concat([masked_common, source_da_remaining], dim="channel") + elif "channel" in final_mask.dims and "channel" not in source_da.dims: + # Mask using first channel if final mask has channel dim and source da does not + var_name_masked = xr.where(final_mask.isel(channel=0), x=source_da, y=fill_value) + else: + var_name_masked = xr.where(final_mask, x=source_da, y=fill_value) # Obtain a shallow copy of source_ds output_ds = source_ds.copy(deep=False) diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 3fafd03c5..19604dd12 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -707,46 +707,90 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, # remove the temporary directory, if it was created temp_dir.cleanup() -""" + @pytest.mark.parametrize( - ("source_has_ch", "mask"), - [ + ("source_has_ch", "mask", "truth_da"), + [ + # source_with_ch_mask_list_with_ch (True, [ xr.DataArray( - np.array([[np.identity(2)], np.identity(2)]), + np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ), xr.DataArray( - np.array([np.identity(2)]), + np.array([np.zeros_like(np.identity(2))]), coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ), - ]), + ], + xr.DataArray( + np.array([[[1, np.nan], [np.nan, 1]], + [[1, np.nan], [np.nan, 1]], + [[np.nan, np.nan], [np.nan, np.nan]]]), + coords={"channel": ["chan1", "chan2", "chan3"], + "ping_time": np.arange(2), "depth": np.arange(2)}, + )), + + # source_with_ch_mask_with_ch + (True, + xr.DataArray( + np.array([np.identity(2), np.identity(2)]), + coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([[[1, np.nan], [np.nan, 1]], + [[1, np.nan], [np.nan, 1]], + [[1, 1], [1, 1]]]), + coords={"channel": ["chan1", "chan2", "chan3"], + "ping_time": np.arange(2), "depth": np.arange(2)}, + )), + + # source_no_ch_mask_with_ch (False, xr.DataArray( np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([[1, np.nan], [np.nan, 1]]), + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, )), + + # source_with_ch_mask_no_ch (True, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, + ), + xr.DataArray( + np.array([[[1, 1, 1], [np.nan, np.nan, np.nan]], + [[np.nan, np.nan, np.nan], [1, 1, 1]]]), + coords={"ping_time": np.arange(2), "depth": np.arange(2), + "channel": ["chan1", "chan2", "chan3"]} )), + + # source_no_ch_mask_no_ch (False, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, + ), + xr.DataArray( + np.array([[1, np.nan], [np.nan, 1]]), + coords={"ping_time": np.arange(2), "depth": np.arange(2)} )), ], ids=[ + "source_with_ch_mask_list_with_ch", "source_with_ch_mask_with_ch", "source_no_ch_mask_with_ch", "source_with_ch_mask_no_ch", "source_no_ch_mask_no_ch", ] ) -def test_apply_mask_channel_variation(source_has_ch, mask): +def test_apply_mask_channel_variation(source_has_ch, mask, truth_da): source_ds = get_mock_source_ds_apply_mask(2, 3, False) var_name = "var1" @@ -757,20 +801,4 @@ def test_apply_mask_channel_variation(source_has_ch, mask): source_ds[f"{var_name}_ch0"] = source_ds[var_name].isel(channel=0).squeeze() var_name = f"{var_name}_ch0" masked_ds = echopype.mask.apply_mask(source_ds, mask, var_name) - - # Output dimension will be the same as source - if source_has_ch: - truth_da = xr.DataArray( - np.array([[[1, 1], [1, 1]]] * 3), - coords={"channel": ["chan1", "chan2", "chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs=source_ds[var_name].attrs - ) - else: - truth_da = xr.DataArray( - [[1, np.nan], [np.nan, 1]], - coords={"ping_time": np.arange(2), "depth": np.arange(2)}, - attrs=source_ds[var_name].attrs - ) - assert masked_ds[var_name].equals(truth_da) -""" \ No newline at end of file From e52e05f33a82d1700d24f5c58d072fe15d319136 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Fri, 15 Mar 2024 22:18:18 +0000 Subject: [PATCH 07/11] add keep_unmasked_channel logic and tests --- echopype/mask/api.py | 18 ++++-- echopype/tests/mask/test_mask.py | 107 +++++++++++++++++++++++++------ 2 files changed, 101 insertions(+), 24 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index b442f22e9..624814d0a 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -260,6 +260,7 @@ def apply_mask( mask: Union[xr.DataArray, str, pathlib.Path, List[Union[xr.DataArray, str, pathlib.Path]]], var_name: str = "Sv", fill_value: Union[int, float, xr.DataArray] = np.nan, + keep_unmasked_channel: bool = True, storage_options_ds: dict = {}, storage_options_mask: Union[dict, List[dict]] = {}, ) -> xr.Dataset: @@ -292,6 +293,9 @@ def apply_mask( Value(s) at masked indices. If ``fill_value`` is of type ``xr.DataArray`` it must have the same shape as each entry of ``mask``. + keep_unmasked_channel: bool, default=True + When True: Channels that are not in mask will be left as is. + When False: Channels that are not in mask will be masked. storage_options_ds: dict, default={} Any additional parameters for the storage backend, corresponding to the path provided for ``source_ds`` @@ -318,7 +322,7 @@ def apply_mask( # Obtain final mask to be applied to var_name if isinstance(mask, list): - # Broadcast all input masks together before combining them (element-wise multiplication) + # Broadcast all input masks together before combining them broadcasted_masks = xr.broadcast(*mask) # Perform a logical AND element-wise operation across the masks @@ -370,12 +374,16 @@ def apply_mask( # Select remaining channels source_da_remaining = source_da.sel(channel=remaining_channels) - # Combine modified common channels with unmodified remaining channels + if not keep_unmasked_channel: + # Replace unmasked channel values with fill value + source_da_remaining = xr.full_like(source_da_remaining, fill_value=fill_value) + + # Combine modified common channels with remaining channels var_name_masked = xr.concat([masked_common, source_da_remaining], dim="channel") - elif "channel" in final_mask.dims and "channel" not in source_da.dims: - # Mask using first channel if final mask has channel dim and source da does not - var_name_masked = xr.where(final_mask.isel(channel=0), x=source_da, y=fill_value) else: + if "channel" in final_mask.dims and "channel" not in source_da.dims: + # Select first channel if final mask has channel dim and source da does not + final_mask = final_mask.isel(channel=0) var_name_masked = xr.where(final_mask, x=source_da, y=fill_value) # Obtain a shallow copy of source_ds diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 19604dd12..056b1c72a 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -709,10 +709,10 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, @pytest.mark.parametrize( - ("source_has_ch", "mask", "truth_da"), + ("source_has_ch", "keep_unmasked_channel", "mask", "truth_da"), [ - # source_with_ch_mask_list_with_ch - (True, [ + # source_with_ch_mask_list_with_ch_keep_unmasked_channel_true + (True, True, [ xr.DataArray( np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, @@ -732,8 +732,47 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "ping_time": np.arange(2), "depth": np.arange(2)}, )), - # source_with_ch_mask_with_ch - (True, + # source_with_ch_mask_list_with_ch_keep_unmasked_channel_false + (True, False, [ + xr.DataArray( + np.array([np.identity(2)]), + coords={"channel": ["chan1"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([np.identity(2)]), + coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + ], + xr.DataArray( + np.array([[[1, np.nan], [np.nan, 1]], + [[np.nan, np.nan], [np.nan, np.nan]], + [[1, np.nan], [np.nan, 1]]]), + coords={"channel": ["chan1", "chan2", "chan3"], + "ping_time": np.arange(2), "depth": np.arange(2)}, + )), + + # source_no_ch_mask_list_with_ch_keep_unmasked_channel_true + (False, True, [ + xr.DataArray( + np.array([np.identity(2), np.identity(2)]), + coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([np.zeros_like(np.identity(2))]), + coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + ], + xr.DataArray( + np.array([[1, np.nan], [np.nan, 1]]), + coords={"ping_time": np.arange(2), "depth": np.arange(2)}, + )), + + # source_with_ch_mask_with_ch_keep_unmasked_channel_true + (True, True, xr.DataArray( np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, @@ -747,8 +786,23 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "ping_time": np.arange(2), "depth": np.arange(2)}, )), - # source_no_ch_mask_with_ch - (False, xr.DataArray( + # source_with_ch_mask_with_ch_keep_unmasked_channel_false + (True, False, + xr.DataArray( + np.array([np.identity(2), np.identity(2)]), + coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + xr.DataArray( + np.array([[[1, np.nan], [np.nan, 1]], + [[1, np.nan], [np.nan, 1]], + [[np.nan, np.nan], [np.nan, np.nan]]]), + coords={"channel": ["chan1", "chan2", "chan3"], + "ping_time": np.arange(2), "depth": np.arange(2)}, + )), + + # source_no_ch_mask_with_ch_keep_unmasked_channel_true + (False, True, xr.DataArray( np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, @@ -758,8 +812,8 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, coords={"ping_time": np.arange(2), "depth": np.arange(2)}, )), - # source_with_ch_mask_no_ch - (True, xr.DataArray( + # source_with_ch_mask_no_ch_keep_unmasked_channel_true + (True, True, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, @@ -771,8 +825,8 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "channel": ["chan1", "chan2", "chan3"]} )), - # source_no_ch_mask_no_ch - (False, xr.DataArray( + # source_no_ch_mask_no_ch_keep_unmasked_channel_true + (False, True, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, @@ -783,22 +837,37 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, )), ], ids=[ - "source_with_ch_mask_list_with_ch", - "source_with_ch_mask_with_ch", - "source_no_ch_mask_with_ch", - "source_with_ch_mask_no_ch", - "source_no_ch_mask_no_ch", + "source_with_ch_mask_list_with_ch_keep_unmasked_channel_true", + "source_with_ch_mask_list_with_ch_keep_unmasked_channel_false", + "source_no_ch_mask_list_with_ch_keep_unmasked_channel_true", + "source_with_ch_mask_with_ch_keep_unmasked_channel_true", + "source_with_ch_mask_with_ch_keep_unmasked_channel_false", + "source_no_ch_mask_with_ch_keep_unmasked_channel_true", + "source_with_ch_mask_no_ch_keep_unmasked_channel_true", + "source_no_ch_mask_no_ch_keep_unmasked_channel_true", ] ) -def test_apply_mask_channel_variation(source_has_ch, mask, truth_da): +def test_apply_mask_channel_variation(source_has_ch, keep_unmasked_channel, mask, truth_da): + # Create source dataset source_ds = get_mock_source_ds_apply_mask(2, 3, False) var_name = "var1" + # Apply mask if source_has_ch: - masked_ds = echopype.mask.apply_mask(source_ds, mask, var_name) + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name, + keep_unmasked_channel=keep_unmasked_channel + ) else: source_ds[f"{var_name}_ch0"] = source_ds[var_name].isel(channel=0).squeeze() var_name = f"{var_name}_ch0" - masked_ds = echopype.mask.apply_mask(source_ds, mask, var_name) + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name, + keep_unmasked_channel=keep_unmasked_channel + ) + + # Check mask to match truth assert masked_ds[var_name].equals(truth_da) From f675f413b16d59cdf0d408d925d34c185b68478d Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Mon, 1 Apr 2024 22:17:00 +0000 Subject: [PATCH 08/11] make more strict as to input dimensions --- echopype/mask/api.py | 57 ++++------- echopype/tests/mask/test_mask.py | 160 ++++++++++++++----------------- 2 files changed, 87 insertions(+), 130 deletions(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 624814d0a..d8f6d6ece 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -260,7 +260,6 @@ def apply_mask( mask: Union[xr.DataArray, str, pathlib.Path, List[Union[xr.DataArray, str, pathlib.Path]]], var_name: str = "Sv", fill_value: Union[int, float, xr.DataArray] = np.nan, - keep_unmasked_channel: bool = True, storage_options_ds: dict = {}, storage_options_mask: Union[dict, List[dict]] = {}, ) -> xr.Dataset: @@ -293,9 +292,6 @@ def apply_mask( Value(s) at masked indices. If ``fill_value`` is of type ``xr.DataArray`` it must have the same shape as each entry of ``mask``. - keep_unmasked_channel: bool, default=True - When True: Channels that are not in mask will be left as is. - When False: Channels that are not in mask will be masked. storage_options_ds: dict, default={} Any additional parameters for the storage backend, corresponding to the path provided for ``source_ds`` @@ -336,7 +332,7 @@ def apply_mask( # Operate on the actual data array to be masked source_da = source_ds[var_name] - # Sanity check: final_mask should be of the same shape as source_ds[var_name] + # The final_mask should be of the same shape as source_ds[var_name] # along the ping_time and range_sample dimensions. source_da_chan_shape = ( source_da.isel(channel=0).shape if "channel" in source_da.dims else source_da.shape @@ -347,44 +343,25 @@ def apply_mask( if final_mask_chan_shape != source_da_chan_shape: raise ValueError( f"The final constructed mask is not of the same shape as source_ds[{var_name}] " - "along the ping_time and range_sample dimensions!" + "along the ping_time, and range_sample dimensions!" ) - - # Apply the mask to var_name - if "channel" in final_mask.dims and "channel" in source_da.dims: - # Identify common channels - common_channels = set(final_mask.coords["channel"].values).intersection( - set(source_da.coords["channel"].values) + # If final_mask has dim channel then source_da must have dim channel + if "channel" in final_mask.dims and "channel" not in source_da.dims: + raise ValueError( + "The final constructed mask has dim channel, " + f"so source_ds[{var_name}] must have dim channel." ) + # If final_mask and source_da both have channel dimension, then they must + # have the same number of channels. + elif "channel" in final_mask.dims and "channel" in source_da.dims: + if len(final_mask["channel"]) != len(source_da["channel"]): + raise ValueError( + f"If both the final constructed mask and source_ds[{var_name}] " + "have channel then they must have matching channel dimensions." + ) - # Convert common channels back to a sorted list to maintain order - common_channels = sorted(list(common_channels)) - - # Select common channels for operation - final_mask_common = final_mask.sel(channel=common_channels) - source_da_common = source_da.sel(channel=common_channels) - - # Perform operation on common channels - masked_common = xr.where(final_mask_common, x=source_da_common, y=fill_value) - - # Identify remaining channels - all_channels = set(source_da.coords["channel"].values) - remaining_channels = sorted(list(all_channels - set(common_channels))) - - # Select remaining channels - source_da_remaining = source_da.sel(channel=remaining_channels) - - if not keep_unmasked_channel: - # Replace unmasked channel values with fill value - source_da_remaining = xr.full_like(source_da_remaining, fill_value=fill_value) - - # Combine modified common channels with remaining channels - var_name_masked = xr.concat([masked_common, source_da_remaining], dim="channel") - else: - if "channel" in final_mask.dims and "channel" not in source_da.dims: - # Select first channel if final mask has channel dim and source da does not - final_mask = final_mask.isel(channel=0) - var_name_masked = xr.where(final_mask, x=source_da, y=fill_value) + # Apply the mask to var_name + var_name_masked = xr.where(final_mask, x=source_da, y=fill_value) # Obtain a shallow copy of source_ds output_ds = source_ds.copy(deep=False) diff --git a/echopype/tests/mask/test_mask.py b/echopype/tests/mask/test_mask.py index 056b1c72a..6eba0d995 100644 --- a/echopype/tests/mask/test_mask.py +++ b/echopype/tests/mask/test_mask.py @@ -708,11 +708,12 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, temp_dir.cleanup() +@pytest.mark.integration @pytest.mark.parametrize( - ("source_has_ch", "keep_unmasked_channel", "mask", "truth_da"), + ("source_has_ch", "mask", "truth_da"), [ - # source_with_ch_mask_list_with_ch_keep_unmasked_channel_true - (True, True, [ + # source_with_ch_mask_list_with_ch + (True, [ xr.DataArray( np.array([np.identity(2), np.identity(2)]), coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, @@ -732,50 +733,26 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "ping_time": np.arange(2), "depth": np.arange(2)}, )), - # source_with_ch_mask_list_with_ch_keep_unmasked_channel_false - (True, False, [ + # source_with_ch_mask_list_with_ch_fail_different_channel_lengths + (True, [ xr.DataArray( np.array([np.identity(2)]), coords={"channel": ["chan1"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ), - xr.DataArray( - np.array([np.identity(2)]), - coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_with_channel"}, - ), - ], - xr.DataArray( - np.array([[[1, np.nan], [np.nan, 1]], - [[np.nan, np.nan], [np.nan, np.nan]], - [[1, np.nan], [np.nan, 1]]]), - coords={"channel": ["chan1", "chan2", "chan3"], - "ping_time": np.arange(2), "depth": np.arange(2)}, - )), - - # source_no_ch_mask_list_with_ch_keep_unmasked_channel_true - (False, True, [ - xr.DataArray( - np.array([np.identity(2), np.identity(2)]), - coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_with_channel"}, - ), xr.DataArray( np.array([np.zeros_like(np.identity(2))]), coords={"channel": ["chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ), ], - xr.DataArray( - np.array([[1, np.nan], [np.nan, 1]]), - coords={"ping_time": np.arange(2), "depth": np.arange(2)}, - )), + None), - # source_with_ch_mask_with_ch_keep_unmasked_channel_true - (True, True, + # source_with_ch_mask_with_ch + (True, xr.DataArray( - np.array([np.identity(2), np.identity(2)]), - coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, + np.array([np.identity(2), np.identity(2), np.ones_like(np.identity(2))]), + coords={"channel": ["chan1", "chan2", "chan3"], "ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_with_channel"}, ), xr.DataArray( @@ -786,34 +763,8 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "ping_time": np.arange(2), "depth": np.arange(2)}, )), - # source_with_ch_mask_with_ch_keep_unmasked_channel_false - (True, False, - xr.DataArray( - np.array([np.identity(2), np.identity(2)]), - coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_with_channel"}, - ), - xr.DataArray( - np.array([[[1, np.nan], [np.nan, 1]], - [[1, np.nan], [np.nan, 1]], - [[np.nan, np.nan], [np.nan, np.nan]]]), - coords={"channel": ["chan1", "chan2", "chan3"], - "ping_time": np.arange(2), "depth": np.arange(2)}, - )), - - # source_no_ch_mask_with_ch_keep_unmasked_channel_true - (False, True, xr.DataArray( - np.array([np.identity(2), np.identity(2)]), - coords={"channel": ["chan1", "chan2"], "ping_time": np.arange(2), "depth": np.arange(2)}, - attrs={"long_name": "mask_with_channel"}, - ), - xr.DataArray( - np.array([[1, np.nan], [np.nan, 1]]), - coords={"ping_time": np.arange(2), "depth": np.arange(2)}, - )), - - # source_with_ch_mask_no_ch_keep_unmasked_channel_true - (True, True, xr.DataArray( + # source_with_ch_mask_no_ch + (True, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, @@ -825,8 +776,16 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, "channel": ["chan1", "chan2", "chan3"]} )), - # source_no_ch_mask_no_ch_keep_unmasked_channel_true - (False, True, xr.DataArray( + # source_no_ch_mask_with_ch_fail + (False, xr.DataArray( + np.array([np.identity(2)]), + coords={"channel": ["chan1"], "ping_time": np.arange(2), "depth": np.arange(2)}, + attrs={"long_name": "mask_with_channel"}, + ), + None), + + # source_no_ch_mask_no_ch + (False, xr.DataArray( np.identity(2), coords={"ping_time": np.arange(2), "depth": np.arange(2)}, attrs={"long_name": "mask_no_channel"}, @@ -835,39 +794,60 @@ def test_apply_mask(n: int, n_chan: int, var_name: str, np.array([[1, np.nan], [np.nan, 1]]), coords={"ping_time": np.arange(2), "depth": np.arange(2)} )), + + # source_no_ch_mask_no_ch_fail_different_ping_time_depth_shape + (False, xr.DataArray( + np.zeros((3, 1)), + coords={"ping_time": np.arange(3), "depth": np.arange(1)}, + attrs={"long_name": "mask_no_channel"}, + ), + None), ], ids=[ - "source_with_ch_mask_list_with_ch_keep_unmasked_channel_true", - "source_with_ch_mask_list_with_ch_keep_unmasked_channel_false", - "source_no_ch_mask_list_with_ch_keep_unmasked_channel_true", - "source_with_ch_mask_with_ch_keep_unmasked_channel_true", - "source_with_ch_mask_with_ch_keep_unmasked_channel_false", - "source_no_ch_mask_with_ch_keep_unmasked_channel_true", - "source_with_ch_mask_no_ch_keep_unmasked_channel_true", - "source_no_ch_mask_no_ch_keep_unmasked_channel_true", + "source_with_ch_mask_list_with_ch", + "source_with_ch_mask_list_with_ch_fail_different_channel_lengths", + "source_with_ch_mask_with_ch", + "source_with_ch_mask_no_ch", + "source_no_ch_mask_with_ch_fail", + "source_no_ch_mask_no_ch", + "source_no_ch_mask_no_ch_fail_different_ping_time_depth_shape", ] ) -def test_apply_mask_channel_variation(source_has_ch, keep_unmasked_channel, mask, truth_da): +def test_apply_mask_channel_variation(source_has_ch, mask, truth_da): # Create source dataset source_ds = get_mock_source_ds_apply_mask(2, 3, False) var_name = "var1" - # Apply mask - if source_has_ch: - masked_ds = echopype.mask.apply_mask(source_ds, - mask, - var_name, - keep_unmasked_channel=keep_unmasked_channel - ) + if truth_da is None: + # Attempt to apply mask w/ 'bad' shapes and check for raised ValueError + with pytest.raises(ValueError): + if source_has_ch: + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name + ) + else: + source_ds[f"{var_name}_ch0"] = source_ds[var_name].isel(channel=0).squeeze() + var_name = f"{var_name}_ch0" + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name + ) else: - source_ds[f"{var_name}_ch0"] = source_ds[var_name].isel(channel=0).squeeze() - var_name = f"{var_name}_ch0" - masked_ds = echopype.mask.apply_mask(source_ds, - mask, - var_name, - keep_unmasked_channel=keep_unmasked_channel - ) - - # Check mask to match truth - assert masked_ds[var_name].equals(truth_da) + # Apply mask and check matching truth_da + if source_has_ch: + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name + ) + else: + source_ds[f"{var_name}_ch0"] = source_ds[var_name].isel(channel=0).squeeze() + var_name = f"{var_name}_ch0" + masked_ds = echopype.mask.apply_mask(source_ds, + mask, + var_name + ) + + # Check mask to match truth + assert masked_ds[var_name].equals(truth_da) From 37828087af60e9697aa307dde461c18792ce5620 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Mon, 1 Apr 2024 22:20:00 +0000 Subject: [PATCH 09/11] use 'a list' instead of 'list' --- echopype/mask/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index d8f6d6ece..88a8a8a7b 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -273,7 +273,7 @@ def apply_mask( Points to a Dataset that contains the variable the mask should be applied to mask: xr.DataArray, str, pathlib.Path, or a list of these datatypes The mask(s) to be applied. - Can be a individual input or list that corresponds to a DataArray or a path. + Can be a individual input or a list that corresponds to a DataArray or a path. Each individual input or entry in the list must contain dimensions ``('ping_time', 'range_sample')`` or dimensions ``('ping_time', 'depth')``. The mask can also contain the dimension ``channel``. From 04ad774b70529855156d30e8531e3f2d105d884d Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Mon, 1 Apr 2024 22:34:04 +0000 Subject: [PATCH 10/11] add 3 cases and user mask subset --- echopype/mask/api.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/echopype/mask/api.py b/echopype/mask/api.py index 88a8a8a7b..217dc9920 100644 --- a/echopype/mask/api.py +++ b/echopype/mask/api.py @@ -267,6 +267,17 @@ def apply_mask( Applies the provided mask(s) to the Sv variable ``var_name`` in the provided Dataset ``source_ds``. + The code allows for these 3 cases of `source_ds` and `mask` dimensions: + + 1) No channel in both source ds and mask, but they have matching ping time and depth + dimensions. + 2) Source ds and mask both have matching channel, ping time, and depth dimensions. + 3) Source ds has channel dimension and mask doesn't, but they have matching ping + time and depth dimensions. + + If a user only wants to apply masks to a subset of the channels in source ds, + they could put 1s/0s for all data in the channels that are not masked. + Parameters ---------- source_ds: xr.Dataset, str, or pathlib.Path From e40deef1079a02fa489876927052e90f156f17ad Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Thu, 4 Apr 2024 21:08:03 +0000 Subject: [PATCH 11/11] add location nan zero warnings --- add_location_nan_zero_warnings.ipynb | 117 +++++++++++++++++++++++++++ echopype/consolidate/api.py | 18 +++++ 2 files changed, 135 insertions(+) create mode 100644 add_location_nan_zero_warnings.ipynb diff --git a/add_location_nan_zero_warnings.ipynb b/add_location_nan_zero_warnings.ipynb new file mode 100644 index 000000000..39e751edf --- /dev/null +++ b/add_location_nan_zero_warnings.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Pip install necessary libraries\n", + "#%pip install echopype s3fs boto3==1.34.51 numpy==1.24.4 xarray==2022.12.0" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "import s3fs\n", + "import boto3\n", + "import echopype as ep\n", + "from botocore import UNSIGNED\n", + "from botocore.config import Config" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Grab from S3 and parse EK60 \n", + "s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))\n", + "s3_file_system = s3fs.S3FileSystem(anon=True)\n", + "\n", + "bucket_name = 'noaa-wcsd-pds'\n", + "ship_name = 'Albatross_Iv'\n", + "cruise_name = 'AL0403'\n", + "sensor_name = 'EK60'\n", + "file_name = \"L0010-D20040416-T094042-EK60.raw\"\n", + "\n", + "raw_file_s3_path = f\"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}\"\n", + "echodata = ep.open_raw(raw_file_s3_path, sonar_model=sensor_name, use_swap=True, storage_options={'anon': True})" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latitude Values Pre Interp (lowest 5): [ 0. 0. 43.68361 43.68362333 43.68362333]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-04 21:06:21,791:echopype.consolidate.api:WARNING: The echodata[\"Platform\"][\"latitude\"] array contains zeros. Interpolation may be negatively impacted, so the user should handle these values.\n", + "2024-04-04 21:06:21,792:echopype.consolidate.api:WARNING: The echodata[\"Platform\"][\"longitude\"] array contains zeros. Interpolation may be negatively impacted, so the user should handle these values.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latitude Values Post Interp (lowest 5): [19.57221436 23.97964969 43.68361412 43.68363238 43.68365699]\n" + ] + } + ], + "source": [ + "# Turn on Echopype Verbosity\n", + "ep.utils.log.verbose(override=False)\n", + "\n", + "# Print out pre interpolated latitudes\n", + "latitude = echodata.platform.latitude.values\n", + "latitude_values = latitude.copy()\n", + "latitude_values.sort()\n", + "print(\"Latitude Values Pre Interp (lowest 5):\", latitude_values[:5])\n", + "\n", + "ds_sv = ep.calibrate.compute_Sv(echodata)\n", + "ds_sv_location = ep.consolidate.add_location(ds_sv, echodata)\n", + "\n", + "# Print out post interpolated latitudes\n", + "latitude_2 = ds_sv_location.latitude.values\n", + "latitude_values_2 = latitude_2.copy()\n", + "latitude_values_2.sort()\n", + "print(\"Latitude Values Post Interp (lowest 5):\", latitude_values_2[:5])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "echopype", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/echopype/consolidate/api.py b/echopype/consolidate/api.py index 364c76be3..caff887f1 100644 --- a/echopype/consolidate/api.py +++ b/echopype/consolidate/api.py @@ -9,11 +9,14 @@ from ..echodata import EchoData from ..echodata.simrad import retrieve_correct_beam_group from ..utils.io import validate_source_ds_da +from ..utils.log import _init_logger from ..utils.prov import add_processing_level from .split_beam_angle import add_angle_to_ds, get_angle_complex_samples, get_angle_power_samples POSITION_VARIABLES = ["latitude", "longitude"] +logger = _init_logger(__name__) + def swap_dims_channel_frequency(ds: xr.Dataset) -> xr.Dataset: """ @@ -177,6 +180,21 @@ def sel_interp(var, time_dim_name): if "longitude" not in echodata["Platform"] or echodata["Platform"]["longitude"].isnull().all(): raise ValueError("Coordinate variables not present or all nan") + # Check if any latitude/longitude value is NaN/0 + contains_nan_lat = np.isnan(echodata["Platform"]["latitude"].values).any() + contains_nan_lon = np.isnan(echodata["Platform"]["longitude"].values).any() + contains_zero_lat = (echodata["Platform"]["latitude"].values == 0).any() + contains_zero_lon = (echodata["Platform"]["longitude"].values == 0).any() + interp_msg = "Interpolation may be negatively impacted, so the user should handle these values." + if contains_nan_lat: + logger.warning(f'The echodata["Platform"]["latitude"] array contains NaNs. {interp_msg}') + if contains_nan_lon: + logger.warning(f'The echodata["Platform"]["longitude"] array contains NaNs. {interp_msg}') + if contains_zero_lat: + logger.warning(f'The echodata["Platform"]["latitude"] array contains zeros. {interp_msg}') + if contains_zero_lon: + logger.warning(f'The echodata["Platform"]["longitude"] array contains zeros. {interp_msg}') + interp_ds = ds.copy() time_dim_name = list(echodata["Platform"]["longitude"].dims)[0] interp_ds["latitude"] = sel_interp("latitude", time_dim_name)