Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Background Noise Removal Failure #1332

Closed
ctuguinay opened this issue Jun 13, 2024 · 2 comments · Fixed by #1369
Closed

Remove Background Noise Removal Failure #1332

ctuguinay opened this issue Jun 13, 2024 · 2 comments · Fixed by #1369
Labels
bug Something isn't working

Comments

@ctuguinay
Copy link
Collaborator

Found in PR #1331, background noise removal fails at during test_raw_to_mvbs in the test file echopype/tests/utils/test_processinglevels_integration.py. Not quite sure what the problem is but I'll get to it later since it is out of the scope of #1331.

______________________________________ test_raw_to_mvbs[AZFP-AZFP-raw_and_xml_paths1-extras1] ______________________________________

sonar_model = 'AZFP', path_model = 'AZFP', raw_and_xml_paths = ('17082117.01A', '17041823.XML')
extras = {'latitude': 45.0, 'longitude': -60.0, 'pressure': 59, 'salinity': 27.9}
test_path = {'AD2CP': PosixPath('/home/exouser/echopype/echopype/test_data/ad2cp'), 'AZFP': PosixPath('/home/exouser/echopype/echo...me/exouser/echopype/echopype/test_data/ea640'), 'ECS': PosixPath('/home/exouser/echopype/echopype/test_data/ecs'), ...}

    @pytest.mark.test
    @pytest.mark.parametrize(
        ["sonar_model", "path_model", "raw_and_xml_paths", "extras"],
        [
            pytest.param(
                "EK60",
                "EK60",
                ("Winter2017-D20170115-T150122.raw", None),
                {},
                # marks=pytest.mark.skipif(sys.platform == "win32", reason="Test data not available on windows tests"),
            ),
            pytest.param(
                "AZFP",
                "AZFP",
                ("17082117.01A", "17041823.XML"),
                {"longitude": -60.0, "latitude": 45.0, "salinity": 27.9, "pressure": 59},
                # marks=pytest.mark.skipif(sys.platform == "win32", reason="Test data not available on windows tests"),
            ),
        ],
    )
    def test_raw_to_mvbs(
            sonar_model,
            path_model,
            raw_and_xml_paths,
            extras,
            test_path
    ):
        # Prepare the Sv dataset
        raw_path = test_path[path_model] / raw_and_xml_paths[0]
        if raw_and_xml_paths[1]:
            xml_path = test_path[path_model] / raw_and_xml_paths[1]
        else:
            xml_path = None
    
        def _presence_test(test_ds, processing_level):
            assert "processing_level" in test_ds.attrs
            assert "processing_level_url" in test_ds.attrs
            assert test_ds.attrs["processing_level"] == processing_level
    
        def _absence_test(test_ds):
            assert "processing_level" not in test_ds.attrs
            assert "processing_level_url" not in test_ds.attrs
    
        # ---- Convert raw file and update_platform
        def _var_presence_notnan_test(name):
            if name in ed['Platform'].data_vars and not ed["Platform"][name].isnull().all():
                return True
            else:
                return False
    
        ed = ep.open_raw(raw_path, xml_path=xml_path, sonar_model=sonar_model)
        if _var_presence_notnan_test("longitude") and _var_presence_notnan_test("latitude"):
            _presence_test(ed["Top-level"], "Level 1A")
        elif "longitude" in extras and "latitude" in extras:
            _absence_test(ed["Top-level"])
            point_ds = xr.Dataset(
                {
                    "latitude": (["time"], np.array([float(extras["latitude"])])),
                    "longitude": (["time"], np.array([float(extras["longitude"])])),
                },
                coords={
                    "time": (["time"], np.array([ed["Sonar/Beam_group1"]["ping_time"].values.min()]))
                },
            )
            ed.update_platform(point_ds, variable_mappings={"latitude": "latitude", "longitude": "longitude"})
            _presence_test(ed["Top-level"], "Level 1A")
        else:
            _absence_test(ed["Top-level"])
            raise RuntimeError(
                "Platform latitude and longitude are not present and cannot be added "
                "using update_platform based on test raw file and included parameters."
            )
    
        # ---- Calibrate and add_latlon
        env_params = None
        if sonar_model == "AZFP":
            # AZFP data require external salinity and pressure
            env_params = {
                "temperature": ed["Environment"]["temperature"].values.mean(),
                "salinity": extras["salinity"],
                "pressure": extras["pressure"],
            }
    
        ds = ep.calibrate.compute_Sv(echodata=ed, env_params=env_params)
        _absence_test(ds)
    
        Sv_ds = ep.consolidate.add_location(ds=ds, echodata=ed)
        assert "longitude" in Sv_ds.data_vars and "latitude" in Sv_ds.data_vars
        _presence_test(Sv_ds, "Level 2A")
    
        # ---- Noise removal
>       denoised_ds = ep.clean.remove_background_noise(Sv_ds, ping_num=10, range_sample_num=20)

echopype/tests/utils/test_processinglevels_integration.py:103: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
echopype/utils/prov.py:237: in inner
    dataobj = func(*args, **kwargs)
echopype/clean/api.py:358: in remove_background_noise
    Sv_noise = estimate_background_noise(ds_Sv, ping_num, range_sample_num, noise_max=noise_max)
echopype/clean/api.py:310: in estimate_background_noise
    noise.reindex({"ping_time": power_cal["ping_time"]}, method="ffill")
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/util/deprecation_helpers.py:115: in inner
    return func(*args, **kwargs)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/dataarray.py:2176: in reindex
    return alignment.reindex(
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:999: in reindex
    aligner.align()
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:582: in align
    self.reindex_all()
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:557: in reindex_all
    self.results = tuple(
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:558: in <genexpr>
    self._reindex_one(obj, matching_indexes)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:544: in _reindex_one
    dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:510: in _get_dim_pos_indexers
    indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/indexes.py:823: in reindex_like
    return {self.dim: get_indexer_nd(self.index, other.index, method, tolerance)}
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/indexes.py:561: in get_indexer_nd
    flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance)
../miniforge3/envs/echopype/lib/python3.9/site-packages/pandas/core/indexes/base.py:3893: in get_indexer
    return self._get_indexer_non_comparable(target, method=method, unique=True)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Index([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200, 210, 220, 230],
      dtype='int64', name='ping_time')
target = DatetimeIndex(['2017-08-21 17:05:37', '2017-08-21 17:05:40',
               '2017-08-21 17:05:43', '2017-08-21 17:05:4...            '2017-08-21 17:53:31', '2017-08-21 17:53:34'],
              dtype='datetime64[ns]', length=240, freq=None)
method = 'pad', unique = True

    @final
    def _get_indexer_non_comparable(
        self, target: Index, method, unique: bool = True
    ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
        """
        Called from get_indexer or get_indexer_non_unique when the target
        is of a non-comparable dtype.
    
        For get_indexer lookups with method=None, get_indexer is an _equality_
        check, so non-comparable dtypes mean we will always have no matches.
    
        For get_indexer lookups with a method, get_indexer is an _inequality_
        check, so non-comparable dtypes mean we will always raise TypeError.
    
        Parameters
        ----------
        target : Index
        method : str or None
        unique : bool, default True
            * True if called from get_indexer.
            * False if called from get_indexer_non_unique.
    
        Raises
        ------
        TypeError
            If doing an inequality check, i.e. method is not None.
        """
        if method is not None:
            other_dtype = _unpack_nested_dtype(target)
>           raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")
E           TypeError: Cannot compare dtypes int64 and datetime64[ns]

../miniforge3/envs/echopype/lib/python3.9/site-packages/pandas/core/indexes/base.py:6301: TypeError
@ctuguinay
Copy link
Collaborator Author

ctuguinay commented Jun 28, 2024

Ok so the problem happens here:

spreading_loss = 20 * np.log10(ds_Sv["echo_range"].where(ds_Sv["echo_range"] >= 1, other=1))
absorption_loss = 2 * ds_Sv["sound_absorption"] * ds_Sv["echo_range"]

# Compute power binned averages
power_cal = _log2lin(ds_Sv["Sv"] - spreading_loss - absorption_loss)
power_cal_binned_avg = 10 * np.log10(
    power_cal.coarsen(
        ping_time=ping_num,
        range_sample=range_sample_num,
        boundary="pad",
    ).mean()
)

# Compute noise
noise = power_cal_binned_avg.min(dim="range_sample", skipna=True)

# Align noise `ping_time` to the first index of each coarsened `ping_time` bin
noise = noise.assign_coords(ping_time=ping_num * np.arange(len(noise["ping_time"])))

# Limit max noise level
noise = noise.where(noise < noise_max, noise_max) if noise_max is not None else noise

# Upsample noise to original ping time dimension
Sv_noise = (
    noise.reindex({"ping_time": power_cal["ping_time"]}, method="ffill")
    + spreading_loss
    + absorption_loss
)

When printing out the ping time indices for noise after the reassignment of coordinates I get this:

image

And when printing out the original ping time indices for power_cal I get this:

image

That being the case, the reindex errors out since there's a comparison between integers and datetime objects being doing within that function.

@ctuguinay
Copy link
Collaborator Author

ctuguinay commented Jul 30, 2024

This problem is caused by noise["ping_time"] having integer coords and power_cal["ping_time"], To fix this, we have to assign integer coords to power_cal["ping_time"] (as shown in #1369).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant