From 459385b8c06206cfa2995186dfa648b1574f9e2c Mon Sep 17 00:00:00 2001
From: Raluca Simedroni <92971445+simedroniraluca@users.noreply.github.com>
Date: Tue, 31 Oct 2023 16:33:03 +0200
Subject: [PATCH] Merge/add transient fixed conflicts (#109)

---
 .github/workflows/packit.yaml                 |   4 +-
 .github/workflows/pypi.yaml                   |   4 +-
 .github/workflows/windows.yaml                |   2 +-
 .pre-commit-config.yaml                       |   6 +-
 docs/source/contributing.rst                  |   4 +-
 docs/source/data-proc.md                      |   4 +-
 echopype/clean/api.py                         |  25 +-
 echopype/clean/impulse_noise.py               | 243 +-------
 echopype/clean/signal_attenuation.py          | 170 +++---
 echopype/clean/transient_noise.py             | 222 ++++----
 .../sensor_ep_version_mapping/v05x_to_v06x.py |   2 +-
 echopype/mask/seabed.py                       |  38 +-
 echopype/tests/calibrate/test_cal_params.py   |   4 +-
 .../tests/clean/test_clean_impulse_noise.py   |   6 +-
 .../tests/clean/test_clean_transient_noise.py |   5 +-
 .../tests/clean/test_signal_attenuation.py    |  10 +-
 echopype/tests/commongrid/conftest.py         | 520 ++++++++++++++++++
 echopype/tests/utils/test_utils_io.py         | 167 +++---
 .../test_utils_mask_transformation_xr.py      | 122 ++++
 echopype/utils/mask_transformation_xr.py      | 115 ++++
 requirements.txt                              |   2 +-
 21 files changed, 1122 insertions(+), 553 deletions(-)
 create mode 100644 echopype/tests/utils/test_utils_mask_transformation_xr.py
 create mode 100644 echopype/utils/mask_transformation_xr.py

diff --git a/.github/workflows/packit.yaml b/.github/workflows/packit.yaml
index 1c184eba8..c855f3748 100644
--- a/.github/workflows/packit.yaml
+++ b/.github/workflows/packit.yaml
@@ -20,7 +20,7 @@ jobs:
         fetch-depth: 0
 
     - name: Set up Python
-      uses: actions/setup-python@v4.7.0
+      uses: actions/setup-python@v4.7.1
       with:
         python-version: 3.9
 
@@ -52,7 +52,7 @@ jobs:
     needs: build-artifact
     runs-on: ubuntu-20.04
     steps:
-    - uses: actions/setup-python@v4.7.0
+    - uses: actions/setup-python@v4.7.1
       name: Install Python
       with:
         python-version: 3.9
diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml
index 3f4d38732..8f9a72870 100644
--- a/.github/workflows/pypi.yaml
+++ b/.github/workflows/pypi.yaml
@@ -24,7 +24,7 @@ jobs:
         fetch-depth: 0
 
     - name: Set up Python
-      uses: actions/setup-python@v4.7.0
+      uses: actions/setup-python@v4.7.1
       with:
         python-version: 3.9
 
@@ -56,7 +56,7 @@ jobs:
     needs: build-artifact
     runs-on: ubuntu-20.04
     steps:
-    - uses: actions/setup-python@v4.7.0
+    - uses: actions/setup-python@v4.7.1
       name: Install Python
       with:
         python-version: 3.9
diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml
index 45059b432..1c73e3b38 100644
--- a/.github/workflows/windows.yaml
+++ b/.github/workflows/windows.yaml
@@ -46,7 +46,7 @@ jobs:
             # Check data endpoint
             curl http://localhost:8080/data/
         - name: Setup Python
-          uses: actions/setup-python@v4.7.0
+          uses: actions/setup-python@v4.7.1
           with:
             python-version: ${{ matrix.python-version }}
             architecture: x64
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1a0e8b230..b79dc4057 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ exclude: |
   )
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -24,7 +24,7 @@ repos:
         args: ["--profile", "black", "--filter-files"]
 
   - repo: https://github.com/psf/black
-    rev: 23.9.1
+    rev: 23.10.1
     hooks:
       - id: black
 
@@ -34,7 +34,7 @@ repos:
       - id: flake8
 
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.5
+    rev: v2.2.6
     hooks:
       - id: codespell
         # Checks spelling in `docs/source` and `echopype` dirs ONLY
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index e8f6f8f94..630c0b002 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -37,7 +37,7 @@ This diagram depicts the complete workflow we use in the source GitHub repositor
 - ``doc patch``: Updates to the documentation that refer to the current ``echopype``
   release can be pushed out immediately to the
   `echopype documentation site <https://echopype.readthedocs.io>`_
-  by contibuting patches (PRs) to the ``stable`` branch. See `Documentation development`_
+  by contributing patches (PRs) to the ``stable`` branch. See `Documentation development`_
   below for more details.
 - ``code patch``: Code development is carried out as patches (PRs) to the ``dev``
   branch; changes in the documentation corresponding to changes in the code can be
@@ -231,7 +231,7 @@ To view the HTML files generated by Jupyter Book, open the
 ``docs/_build/html/index.html`` in your browser.
 
 Jupyter Book `configurations <https://jupyterbook.org/en/stable/customize/config.html>`_ can be found in the ``docs/source/_config.yml`` file.
-The `table of contents <https://jupyterbook.org/en/stable/structure/toc.html>`_ arragements for the sidebar can be found in ``docs/source/_toc.yml`` file.
+The `table of contents <https://jupyterbook.org/en/stable/structure/toc.html>`_ arrangements for the sidebar can be found in ``docs/source/_toc.yml`` file.
 
 When ready to commit your changes, please pull request your changes to the `stable` branch. Once the PR is submitted, the `pre-commit` CI will run for basic spelling and formatting check (See the `pre-commit hooks section <contributing.html#pre-commit-hooks>`_ for more details). Any changes from the `pre-commit` check have to be pulled to your branch (via `git pull`) before your push further commits. You will also be able to view the newly built doc in the PR via the "docs/readthedocs.org:echopype" entry shown below.
 
diff --git a/docs/source/data-proc.md b/docs/source/data-proc.md
index 60678f8e9..75b906496 100644
--- a/docs/source/data-proc.md
+++ b/docs/source/data-proc.md
@@ -1,6 +1,6 @@
 # Data processing
 
-Echopype data processing funcionalities are structured into different subpackages with expandability and a series of [data processing levels](processing-levels) in mind. Once the data is converted from the raw instrument data files to standardized [`EchoData` objects](data-format:echodata-object) (or stored in `.zarr` or `.nc` format) and calibrated, the core input and output of most subsequent functions are generic [xarray `Datasets`](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html). This design allows new processing functions be easily added without needing to understand specialized objects, other than functions needing access of data stored only in the raw-converted `EchoData` objects.
+Echopype data processing functionalities are structured into different subpackages with expandability and a series of [data processing levels](processing-levels) in mind. Once the data is converted from the raw instrument data files to standardized [`EchoData` objects](data-format:echodata-object) (or stored in `.zarr` or `.nc` format) and calibrated, the core input and output of most subsequent functions are generic [xarray `Datasets`](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html). This design allows new processing functions be easily added without needing to understand specialized objects, other than functions needing access of data stored only in the raw-converted `EchoData` objects.
 
 The section [**Data processing functionalities**](data-proc:functions) provides information for current processing functions and their usage.
 
@@ -9,6 +9,6 @@ The section [**Additional information for processed data**](data-proc:additional
 (data-proc:format)=
 ## Format of processed data
 
-Once raw data (represented by the `EchoData` objects) are calibrated  (via [`compute_Sv`](echopype.calibrate.compute_Sv)), the calibrated data and the outputs of all subsequent [processing functions](data-process:funcionalities) are generic [xarray Datasets](https://docs.xarray.dev/en/stable/user-guide/data-structures.html#dataset).
+Once raw data (represented by the `EchoData` objects) are calibrated  (via [`compute_Sv`](echopype.calibrate.compute_Sv)), the calibrated data and the outputs of all subsequent [processing functions](data-process:functionalities) are generic [xarray Datasets](https://docs.xarray.dev/en/stable/user-guide/data-structures.html#dataset).
 We currently do not follow any specific conventions for processed data, but we retain provenance information in the dataset, including the [data processing levels](./processing-levels.md).
 However, whether and how data variables used in the processing will be stored remain to be determined.
diff --git a/echopype/clean/api.py b/echopype/clean/api.py
index 99fbd25a8..52f41e675 100644
--- a/echopype/clean/api.py
+++ b/echopype/clean/api.py
@@ -4,13 +4,21 @@
 import pathlib
 from typing import Union
 
+import xarray as xr
+from pandas import Index
+import pathlib
+from typing import Union
+
 import xarray as xr
 from pandas import Index
 
+from ..utils.io import get_dataset
+from ..utils.misc import frequency_nominal_to_channel
 from ..utils.io import get_dataset
 from ..utils.misc import frequency_nominal_to_channel
 from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level
 from . import impulse_noise, signal_attenuation, transient_noise
+from . import impulse_noise, signal_attenuation, transient_noise
 from .noise_est import NoiseEst
 
 
@@ -92,7 +100,7 @@ def get_transient_noise_mask(
     method: str = "ryan",
 ) -> xr.DataArray:
     """
-    Create a mask based on the identified signal attenuations of Sv values at 38KHz.
+    Create a transient noise mask.
     This method is based on:
     Ryan et al. (2015) ‘Reducing bias due to noise and attenuation in
         open-ocean echo integration data’, ICES Journal of Marine Science,
@@ -166,7 +174,6 @@ def get_impulse_noise_mask(
         Can contain the following:
             thr: Union[Tuple[float, float], int, float]
                 User-defined threshold value (dB) (ryan and ryan iterable)
-                or a 2-element tuple with the range of threshold values (wang).
             m:  Optional[Union[int, float]] = None,
                 Vertical binning length (in number of samples or range)
                 (ryan and ryan iterable).
@@ -175,17 +182,8 @@ def get_impulse_noise_mask(
                 Number of pings either side for comparisons (ryan),
                 or a 2-element tuple specifying the range (ryan iterable).
                 Defaults to None.
-            erode: Optional[List[Tuple[int, int]]] = None,
-                Window size for each erosion cycle (wang).
-                Defaults to None.
-            dilate: Optional[List[Tuple[int, int]]] = None,
-                Window size for each dilation cycle (wang).
-                Defaults to None.
-            median: Optional[List[Tuple[int, int]]] = None,
-                Window size for each median filter cycle (wang).
-                Defaults to None.
     method: str, optional
-        The method (ryan, ryan_iterable or wang) used to mask impulse noise.
+        The method (ryan, ryan_iterable) used to mask impulse noise.
         Defaults to 'ryan'.
 
     Returns
@@ -200,7 +198,7 @@ def get_impulse_noise_mask(
     mask_map = {
         "ryan": impulse_noise._ryan,
         "ryan_iterable": impulse_noise._ryan_iterable,
-        "wang": impulse_noise._wang,
+        # "wang": impulse_noise._wang,
     }
     if method not in mask_map.keys():
         raise ValueError(f"Unsupported method: {method}")
@@ -449,4 +447,3 @@ def get_attenuation_mask_multichannel(
         mask_list.append(mask)
     mask = create_multichannel_mask(mask_list, channel_list)
     return mask
-  
\ No newline at end of file
diff --git a/echopype/clean/impulse_noise.py b/echopype/clean/impulse_noise.py
index 5c453be21..4dfe5c38a 100644
--- a/echopype/clean/impulse_noise.py
+++ b/echopype/clean/impulse_noise.py
@@ -12,16 +12,13 @@
 __authors__ = [
     "Alejandro Ariza",  # wrote ryan(), ryan_iterable(), and wang()
     "Raluca Simedroni",  # adapted the impulse noise masking algorithms to echopype
+    "Ruxandra Valcu",  # adapted the ryan algorithm to use xarray data structures
 ]
 
-import warnings
-
 import numpy as np
 import xarray as xr
-from scipy.ndimage import median_filter
-from skimage.morphology import dilation, erosion
 
-from ..utils import mask_transformation
+from ..utils.mask_transformation_xr import downsample, upsample
 
 RYAN_DEFAULT_PARAMS = {"thr": 10, "m": 5, "n": 1}
 RYAN_ITERABLE_DEFAULT_PARAMS = {"thr": 10, "m": 5, "n": (1, 2)}
@@ -33,178 +30,6 @@
 }
 
 
-def _wang(
-    Sv_ds: xr.Dataset,
-    desired_channel: str,
-    parameters: dict = WANG_DEFAULT_PARAMS,
-) -> xr.DataArray:
-    """
-    Clean impulse noise from Sv data following the method described by:
-
-        Wang et al. (2015) ’A noise removal algorithm for acoustic data with
-        strong interference based on post-processing techniques’, CCAMLR
-        SG-ASAM: 15/02.
-
-    This algorithm runs different cycles of erosion, dilation, and median
-    filtering to clean impulse noise from Sv.
-    Returns a boolean mask indicating the location of impulse noise in Sv data.
-
-    Parameters
-    ----------
-        Sv_ds (xarray.Dataset): xr.DataArray with Sv data for multiple channels (dB).
-        desired_channel (str): Name of the desired frequency channel.
-        parameters {}: parameter dict, should contain:
-            thr    : 2-element tuple with bottom/top Sv thresholds (dB).
-            erode  : List of 2-element tuples indicating the window's size
-                        for each erosion cycle.
-            dilate : List of 2-element tuples indicating the window's size
-                        for each dilation cycle.
-            median : List of 2-element tuples indicating the window's size
-                        for each median filter cycle.
-
-    Returns
-    -------
-        xarray.DataArray: xr.DataArray with mask indicating the presence of impulse noise.
-
-    Warning
-    -------
-    Input Sv data shouldn't contain NaN values.
-    These values are not processed correctly by the impulse noise removal algorithm and
-    will be marked as noise in the output mask.
-    Please ensure that Sv data is cleaned or appropriately preprocessed
-    before using this function.
-
-    This method identifies the locations of noise in the Sv data but
-    does not follow the exact same process as the wang function from echopy,
-    which replaces the identified noise values with -999. The visual representation in echograms
-    produced from the output of this method may therefore differ from those generated using
-    the wang from echopy function. Users should take into
-    account that regions marked as True in the returned mask have been identified as noise.
-
-
-    """
-    parameter_names = ("thr", "erode", "dilate", "median")
-    if not all(name in parameters.keys() for name in parameter_names):
-        raise ValueError(
-            "Missing parameters - should be thr, erode, dilate, median, are"
-            + str(parameters.keys())
-        )
-    thr = parameters["thr"]
-    erode = parameters["erode"]
-    dilate = parameters["dilate"]
-    median = parameters["median"]
-
-    # Select the desired frequency channel directly using 'sel'
-    selected_channel_ds = Sv_ds.sel(channel=desired_channel)
-
-    # Extract Sv values for the desired frequency channel
-    Sv = selected_channel_ds["Sv"].values
-
-    # Check if there are any NaN values in the Sv data
-    if np.isnan(Sv).any():
-        warnings.warn(
-            "Input Sv data contains NaN values."
-            "These values are not processed correctly by the impulse noise removal algorithm"
-            "and will be marked as noise in the output mask."
-            "Please ensure that Sv data is cleaned or appropriately "
-            "preprocessed before using this function."
-        )
-
-    # Transpose the Sv data so that the vertical dimension is the first dimension (axis 0)
-    Sv = np.transpose(Sv)
-
-    """
-    Call the wang function to get the cleaned Sv data and the mask indicating edges,
-    where swarms analysis couldn't be performed
-    The variable mask_ is a boolean mask where
-    True represents edges where cleaning wasn't applied,
-    and False represents areas where cleaning was applied
-    """
-    # Sv_cleaned, mask_ = _wang(Sv, thr, erode, dilate, median)
-    # set weak noise and strong interference as vacant samples (-999)
-    Sv_thresholded = Sv.copy()
-    Sv_thresholded[(Sv < thr[0]) | (Sv > thr[1])] = -999
-
-    # remaining weak interferences will take neighbouring vacant values
-    # by running erosion cycles
-    Sv_eroded = Sv.copy()
-    for e in erode:
-        Sv_eroded = erosion(Sv_thresholded, np.ones(e))
-
-    # the last step might have turned interferences inside biology into vacant
-    # samples, this is solved by running dilation cycles
-    Sv_dilated = Sv_eroded.copy()
-    for d in dilate:
-        Sv_dilated = dilation(Sv_dilated, np.ones(d))
-
-    # dilation has modified the Sv value of biological features, so these are
-    # now corrected to corresponding Sv values before the erosion/dilation
-    Sv_corrected = Sv_dilated.copy()
-    mask_bio = (Sv_dilated >= thr[0]) & (Sv_dilated < thr[1])
-    Sv_corrected[mask_bio] = Sv_thresholded[mask_bio]
-
-    # compute median convolution in Sv corrected array
-    Sv_median = Sv_corrected.copy()
-    for m in median:
-        Sv_median = mask_transformation.log(
-            median_filter(mask_transformation.lin(Sv_median), footprint=np.ones(m))
-        )
-
-    # any vacant sample inside biological features will be corrected with
-    # the median of corresponding neighbouring samples
-    Sv_cleaned = Sv_corrected.copy()
-    mask_bio = (Sv >= thr[0]) & (Sv < thr[1])
-    mask_vacant = Sv_corrected == -999
-    Sv_cleaned[mask_vacant & mask_bio] = Sv_median[mask_vacant & mask_bio]
-
-    # get mask indicating edges, where swarms analysis couldn't be performed
-    mask_ = np.ones_like(Sv_cleaned, dtype=bool)
-    idx = int((max([e[0], d[0]]) - 1) / 2)
-    jdx = int((max([e[1], d[1]]) - 1) / 2)
-    mask_[idx:-idx, jdx:-jdx] = False
-
-    # return Sv_corrected2, mask_
-
-    """
-    Create a boolean mask comparing the original and cleaned Sv data
-    Creates a boolean mask where True denotes locations where the original Sv values
-    are different from the cleaned Sv values.
-    """
-
-    noise_mask = Sv != Sv_cleaned
-
-    # Combined mask
-    # The bitwise negation ~ operator is applied to mask_.
-    # So, ~mask_ is True where cleaning was applied and
-    # False where cleaning wasn't applied (the edges).
-    combined_mask = np.logical_and(~mask_, noise_mask)
-
-    # Transpose the mask back to its original shape
-    # Combined_mask is a mask that marks valid (non-edge) locations where
-    # noise has been identified and cleaned.
-    combined_mask = np.transpose(noise_mask)
-
-    # Create a new xarray for the mask with the correct dimensions and coordinates
-    mask_xr = xr.DataArray(
-        combined_mask,
-        dims=("ping_time", "range_sample"),
-        coords={
-            "ping_time": selected_channel_ds.ping_time.values,
-            "range_sample": selected_channel_ds.range_sample.values,
-        },
-    )
-
-    warnings.warn(
-        "The output mask from this function identifies regions of noise in the Sv data, "
-        "but does not modify them in the same way as the `wang` function from echopy."
-        "Visualizations using this mask may therefore differ from"
-        "those generated using the `wang` function from echopy. "
-        "Be aware that regions marked as True in the mask are identified as noise."
-    )
-
-    return mask_xr
-
-
 def _ryan(
     Sv_ds: xr.Dataset,
     desired_channel: str,
@@ -250,57 +75,26 @@ def _ryan(
     # Select the desired frequency channel directly using 'sel'
     selected_channel_ds = Sv_ds.sel(channel=desired_channel)
 
-    # Extract Sv and iax for the desired frequency channel
-    Sv = selected_channel_ds["Sv"].values
-
-    # But first, transpose the Sv data so that the vertical dimension is axis 0
-    Sv = np.transpose(Sv)
-
-    iax = selected_channel_ds.range_sample.values
+    Sv = selected_channel_ds.Sv
+    Sv_ = downsample(Sv, coordinates={"range_sample": m}, is_log=True)
+    Sv_ = upsample(Sv_, Sv)
 
-    # Call the existing ryan function
-    # mask, mask_ = _ryan(Sv, iax, m, n, thr)
-    iax_ = np.arange(iax[0], iax[-1], m)
-    Sv_ = mask_transformation.oned(Sv, iax, iax_, 0, log_var=True)[0]
-
-    # resample back to full resolution
-    jax = np.arange(len(Sv[0]))
-    Sv_, mask_ = mask_transformation.full(Sv_, iax_, jax, iax, jax)
-
-    # side comparison (±n)
-    dummy = np.zeros((iax.shape[0], n)) * np.nan
-    comparison_forward = Sv_ - np.c_[Sv_[:, n:], dummy]
-    comparison_backward = Sv_ - np.c_[dummy, Sv_[:, 0:-n]]
+    # get valid sample mask
+    mask = Sv_.isnull()
 
     # get IN mask
-    comparison_forward[np.isnan(comparison_forward)] = np.inf
-    maskf = comparison_forward > thr
-    comparison_backward[np.isnan(comparison_backward)] = np.inf
-    maskb = comparison_backward > thr
-    mask = maskf & maskb
-
-    # get second mask indicating valid samples in IN mask
-    mask_[:, 0:n] = False
-    mask_[:, -n:] = False
-
-    # return mask, mask_
+    forward = Sv_ - Sv_.shift(shifts={"ping_time": n}, fill_value=np.nan)
+    backward = Sv_ - Sv_.shift(shifts={"ping_time": -n}, fill_value=np.nan)
+    forward = forward.fillna(np.inf)
+    backward = backward.fillna(np.inf)
+    mask_in = (forward > thr) & (backward > thr)
+    # add to the mask areas that have had data shifted out of range
+    mask_in[0:n, :] = True
+    mask_in[-n:, :] = True
 
-    # Transpose the mask back to its original shape
-    mask = np.transpose(mask)
-    mask_ = np.transpose(mask_)
-    combined_mask = mask & (~mask_)
-
-    # Create a new xarray for the mask with the correct dimensions and coordinates
-    mask_xr = xr.DataArray(
-        combined_mask,
-        dims=("ping_time", "range_sample"),
-        coords={
-            "ping_time": selected_channel_ds.ping_time.values,
-            "range_sample": selected_channel_ds.range_sample.values,
-        },
-    )
-
-    return mask_xr
+    mask = mask | mask_in
+    mask = mask.drop("channel")
+    return mask
 
 
 def _ryan_iterable(
@@ -358,4 +152,3 @@ def _ryan_iterable(
         for mask in mask_list[1:]:
             mask_xr |= mask
     return mask_xr
-  
\ No newline at end of file
diff --git a/echopype/clean/signal_attenuation.py b/echopype/clean/signal_attenuation.py
index ade941646..d2caea96e 100644
--- a/echopype/clean/signal_attenuation.py
+++ b/echopype/clean/signal_attenuation.py
@@ -2,9 +2,8 @@
 
 import numpy as np
 import xarray as xr
-from skimage.measure import label
 
-from ..utils.mask_transformation import full as _full, lin as _lin, log as _log, twod as _twod
+from ..utils.mask_transformation_xr import lin as _lin, line_to_square, log as _log
 
 DEFAULT_RYAN_PARAMS = {"r0": 180, "r1": 280, "n": 30, "thr": -6, "start": 0}
 DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}
@@ -55,56 +54,61 @@ def _ryan(source_Sv: xr.DataArray, desired_channel: str, parameters=DEFAULT_RYAN
     r1 = parameters["r1"]
     n = parameters["n"]
     thr = parameters["thr"]
-    start = parameters["start"]
+    # start = parameters["start"]
 
-    selected_channel_Sv = source_Sv.sel(channel=desired_channel)
-    Sv = selected_channel_Sv["Sv"].values
-    r = source_Sv["echo_range"].values[0, 0]
+    channel_Sv = source_Sv.sel(channel=desired_channel)
+    Sv = channel_Sv["Sv"]
+    r = channel_Sv["echo_range"][0]
 
     # raise errors if wrong arguments
     if r0 > r1:
         raise Exception("Minimum range has to be shorter than maximum range")
 
-    # return empty mask if searching range is outside the echosounder range
+        # return empty mask if searching range is outside the echosounder range
     if (r0 > r[-1]) or (r1 < r[0]):
-        warnings.warn("Searching range is outside the echosounder range. Returning empty mask.")
-        mask = np.zeros_like(Sv, dtype=bool)
-        mask_ = np.zeros_like(Sv, dtype=bool)
-
-        # turn layer boundaries into arrays with length = Sv.shape[1]
-    r0 = np.ones(Sv.shape[1]) * r0
-    r1 = np.ones(Sv.shape[1]) * r1
-
-    # start masking process
-    mask_ = np.zeros(Sv.shape, dtype=bool)
-    mask = np.zeros(Sv.shape, dtype=bool)
-    # find indexes for upper and lower SL limits
-    up = np.argmin(abs(r - r0))
-    lw = np.argmin(abs(r - r1))
-    for j in range(start, len(Sv)):
-        # TODO: now indexes are the same at every loop, but future
-        # versions will have layer boundaries with variable range
-        # (need to implement mask_layer.py beforehand!)
-
-        # mask where AS evaluation is unfeasible (e.g. edge issues, all-NANs)
-        if (j - n < 0) | (j + n > len(Sv) - 1) | np.all(np.isnan(Sv[j, up:lw])):
-            mask_[j, :] = True
-
-        # compare ping and block medians otherwise & mask ping if too different
-        else:
-            pingmedian = _log(np.nanmedian(_lin(Sv[j, up:lw])))
-            blockmedian = _log(np.nanmedian(_lin(Sv[(j - n) : (j + n), up:lw])))
-
-            if (pingmedian - blockmedian) < thr:
-                mask[j, :] = True
-
-    final_mask = np.logical_not(mask[start:, :] | mask_[start:, :])
-    return_mask = xr.DataArray(
-        final_mask,
-        dims=("ping_time", "range_sample"),
-        coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
+        # Raise a warning to inform the user
+        warnings.warn(
+            "The searching range is outside the echosounder range. "
+            "A default mask with all True values is returned, "
+            "which won't mask any data points in the dataset."
+        )
+        return xr.DataArray(
+            np.ones_like(Sv, dtype=bool),
+            dims=("ping_time", "range_sample"),
+            coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
+        )
+
+    # get upper and lower range indexes
+    up = abs(r - r0).argmin(dim="range_sample").values
+    lw = abs(r - r1).argmin(dim="range_sample").values
+
+    ping_median = _log(_lin(Sv).median(dim="range_sample", skipna=True))
+    # ping_75q = _log(_lin(Sv).reduce(np.nanpercentile, q=75, dim="range_sample"))
+
+    block = Sv[:, up:lw]
+    block_list = [block.shift({"ping_time": i}) for i in range(-n, n)]
+    concat_block = xr.concat(block_list, dim="range_sample")
+    block_median = _log(_lin(concat_block).median(dim="range_sample", skipna=True))
+
+    noise_column = (ping_median - block_median) > thr
+
+    noise_column_mask = xr.DataArray(
+        data=line_to_square(noise_column, Sv, "range_sample").transpose(),
+        dims=Sv.dims,
+        coords=Sv.coords,
     )
-    return return_mask
+    noise_column_mask = ~noise_column_mask
+
+    nan_mask = Sv.isnull()
+    nan_mask = nan_mask.reduce(np.any, dim="range_sample")
+
+    # uncomment these if we want to mask the areas where we couldn't calculate
+    # nan_mask[0:n] = False
+    # nan_mask[-n:] = False
+
+    mask = nan_mask & noise_column_mask
+    mask = mask.drop("channel")
+    return mask
 
 
 def _ariza(source_Sv, desired_channel, parameters=DEFAULT_ARIZA_PARAMS):
@@ -117,78 +121,30 @@ def _ariza(source_Sv, desired_channel, parameters=DEFAULT_ARIZA_PARAMS):
         source_Sv (xr.DataArray): Sv array
         selected_channel (str): name of the channel to process
         parameters(dict): dict of parameters, containing:
-            offset (int):
-            m (int):
-            n (int):
             thr (int):
+            seabed_mask: (xr.DataArray) - externally created seabed mask
 
     Returns:
         xr.DataArray: boolean array with AS mask, with ping_time and range_sample dims
     """
-    parameter_names = (
-        "thr",
-        "m",
-        "n",
-        "offset",
-    )
+    parameter_names = ("thr", "seabed")
     if not all(name in parameters.keys() for name in parameter_names):
         raise ValueError(
-            "Missing parameters - should be thr, m, n, offset, are" + str(parameters.keys())
+            "Missing parameters - should be thr, m, n, offset, seabed, are" + str(parameters.keys())
         )
-    m = parameters["m"]
-    n = parameters["n"]
+    # m = parameters["m"]
+    # n = parameters["n"]
     thr = parameters["thr"]
-    offset = parameters["offset"]
-
-    selected_channel_Sv = source_Sv.sel(channel=desired_channel)
-    Sv = selected_channel_Sv["Sv"].values
-    r = source_Sv["echo_range"].values[0, 0]
-
-    # get ping array
-    p = np.arange(len(Sv))
-    # set to NaN shallow waters and data below the Sv threshold
-    Sv_ = Sv.copy()
-    Sv_[0 : np.nanargmin(abs(r - offset)), :] = np.nan
-    Sv_[Sv_ < thr[0]] = np.nan
-    # bin Sv
-    # TODO: update to 'twod' and 'full' functions
-    # DID
-    irvals = np.round(np.linspace(p[0], p[-1], num=int((p[-1] - p[0]) / n) + 1))
-    jrvals = np.linspace(r[0], r[-1], num=int((r[-1] - r[0]) / m) + 1)
-    Sv_bnd, p_bnd, r_bnd = _twod(Sv_, p, r, irvals, jrvals, operation="mean")[0:3]
-    Sv_bnd = _full(Sv_bnd, p_bnd, r_bnd, p, r)[0]
-    # label binned Sv data features
-    Sv_lbl = label(~np.isnan(Sv_bnd))
-    labels = np.unique(Sv_lbl)
-    labels = np.delete(labels, np.where(labels == 0))
-    # list the median values for each Sv feature
-    val = []
-    for lbl in labels:
-        val.append(_log(np.nanmedian(_lin(Sv_bnd[Sv_lbl == lbl]))))
-
-    # keep the feature with a median above the Sv threshold (~seabed)
-    # and set the rest of the array to NaN
-    if val:
-        if np.nanmax(val) > thr[1]:
-            labels = labels[val != np.nanmax(val)]
-            for lbl in labels:
-                Sv_bnd[Sv_lbl == lbl] = np.nan
-        else:
-            Sv_bnd[:] = np.nan
-    else:
-        Sv_bnd[:] = np.nan
-
-    # remove everything in the original Sv array that is not seabed
-    Sv_sb = Sv.copy()
-    Sv_sb[np.isnan(Sv_bnd)] = np.nan
-
-    # compute the percentile 90th for each ping, at the range at which
-    # the seabed is supposed to be.
-    seabed_percentile = _log(np.nanpercentile(_lin(Sv_sb), 95, axis=0))
-
-    # get mask where this value falls below a Sv threshold (seabed breaches)
-    mask = seabed_percentile < thr[0]
-    mask = np.tile(mask, [len(Sv), 1])
+    # offset = parameters["offset"]
+    seabed = parameters["seabed"]
+
+    channel_Sv = source_Sv.sel(channel=desired_channel)
+    Sv = channel_Sv["Sv"]
+
+    Sv_sb = Sv.copy(deep=True).where(seabed, np.isnan)
+    seabed_percentile = _log(_lin(Sv_sb).reduce(dim="range_sample", func=np.nanpercentile, q=95))
+    mask = line_to_square(seabed_percentile < thr[0], Sv, dim="range_sample").transpose()
+    mask = mask.drop("channel")
     return_mask = xr.DataArray(
         mask,
         dims=("ping_time", "range_sample"),
diff --git a/echopype/clean/transient_noise.py b/echopype/clean/transient_noise.py
index 7dcad5dec..bfa1ec935 100644
--- a/echopype/clean/transient_noise.py
+++ b/echopype/clean/transient_noise.py
@@ -12,22 +12,25 @@
 __authors__ = [
     "Alejandro Ariza",  # wrote ryan(), fielding()
     "Mihai Boldeanu",  # adapted the mask transient noise algorithms to echopype
+    "Ruxandra Valcu",  # modified ryan and fielding to run off xarray functionality
 ]
 
-
 import warnings
 
 import numpy as np
 import xarray as xr
 
-from ..utils.mask_transformation import lin as _lin, log as _log
+# from ..utils.mask_transformation import lin as _lin, log as _log
+from ..utils.mask_transformation_xr import lin as _lin, line_to_square, log as _log
 
 RYAN_DEFAULT_PARAMS = {
+    #    "m": 5,
+    #    "n": 20,
     "m": 5,
-    "n": 20,
+    "n": 5,
     "thr": 20,
     "excludeabove": 250,
-    "operation": "percentile15",
+    "operation": "mean",
 }
 FIELDING_DEFAULT_PARAMS = {
     "r0": 200,
@@ -54,6 +57,7 @@ def _ryan(source_Sv: xr.DataArray, desired_channel: str, parameters: dict = RYAN
     transient noise. Sv values are removed if exceed a threshold. Masking is
     excluded above 250 m by default to avoid the removal of aggregated biota.
 
+    Args:
     Args:
         source_Sv (xr.DataArray): Sv array
         selected_channel (str): name of the channel to process
@@ -83,46 +87,44 @@ def _ryan(source_Sv: xr.DataArray, desired_channel: str, parameters: dict = RYAN
     excludeabove = parameters["excludeabove"]
     operation = parameters["operation"]
 
-    selected_channel_Sv = source_Sv.sel(channel=desired_channel)
-    Sv = selected_channel_Sv["Sv"].values
-    r = source_Sv["echo_range"].values[0, 0]
-
-    # offsets for i and j indexes
-    ioff = n
-    joff = np.argmin(abs(r - m))
-
-    # preclude processing above a user-defined range
-    r0 = np.argmin(abs(r - excludeabove))
-
-    # mask if Sv sample greater than averaged block
-    # TODO: find out a faster method. The iteration below is too slow.
-    mask = np.ones(Sv.shape, dtype=bool)
-    mask[:, 0:r0] = False
-
-    for i in range(len(Sv)):
-        for j in range(r0, len(Sv[0])):
-            # proceed only if enough room for setting the block
-            if (i - ioff >= 0) & (i + ioff < len(Sv)) & (j - joff >= 0) & (j + joff < len(Sv[0])):
-                sample = Sv[i, j]
-                if operation == "mean":
-                    block = _log(np.nanmean(_lin(Sv[i - ioff : i + ioff, j - joff : j + joff])))
-                elif operation == "median":
-                    block = _log(np.nanmedian(_lin(Sv[i - ioff : i + ioff, j - joff : j + joff])))
-                else:
-                    block = _log(
-                        np.nanpercentile(
-                            _lin(Sv[i - ioff : i + ioff, j - joff : j + joff]), int(operation[-2:])
-                        )
-                    )
-                mask[i, j] = sample - block > thr
-
-    mask = np.logical_not(mask)
-    return_mask = xr.DataArray(
-        mask,
-        dims=("ping_time", "range_sample"),
-        coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
-    )
-    return return_mask
+    channel_Sv = source_Sv.sel(channel=desired_channel)
+    Sv = channel_Sv["Sv"]
+    range = channel_Sv["echo_range"][0]
+
+    # calculate offsets
+    ping_offset = n * 2 + 1
+    range_offset = abs(range - m).argmin(dim="range_sample").values
+    range_offset = int(range_offset) * 2 + 1
+
+    r0 = abs(range - excludeabove).argmin(dim="range_sample").values
+
+    mask = Sv > 0  # just to create it at the right size
+
+    # create averaged/median value block
+    block = _lin(Sv)
+    if operation == "mean":
+        # block = block.rolling(ping_time=n, range_sample=range_offset).mean()
+        block = block.rolling(ping_time=ping_offset, range_sample=range_offset, center=True).reduce(
+            func=np.nanmean
+        )
+    elif operation == "median":
+        block = block.rolling(ping_time=ping_offset, range_sample=range_offset, center=True).reduce(
+            func=np.nanmedian
+        )
+    else:  # percentile
+        q = int(operation[-2:])
+        block = block.rolling(ping_time=ping_offset, range_sample=range_offset, center=True).reduce(
+            func=np.nanpercentile, q=q
+        )
+    block = _log(block)
+
+    mask = Sv - block > thr
+
+    mask = mask.where(~(mask["range_sample"] < r0), False)
+    mask = ~mask
+    mask = mask.drop("channel")
+
+    return mask
 
 
 def _fielding(
@@ -176,77 +178,111 @@ def _fielding(
     r1 = parameters["r1"]
     n = parameters["n"]
     thr = parameters["thr"]
-    roff = parameters["roff"]
+    # roff = parameters["roff"]
     maxts = parameters["maxts"]
     jumps = parameters["jumps"]
-    start = parameters["start"]
+    # start = parameters["start"]
 
-    selected_channel_Sv = source_Sv.sel(channel=desired_channel)
-    Sv = selected_channel_Sv["Sv"].values
-    r = source_Sv["echo_range"].values[0, 0]
+    channel_Sv = source_Sv.sel(channel=desired_channel)
+    Sv = channel_Sv["Sv"]
+    r = channel_Sv["echo_range"][0]
 
     # raise errors if wrong arguments
     if r0 > r1:
         raise Exception("Minimum range has to be shorter than maximum range")
 
-    # In the case where the searching range is outside the echosounder range:
+    # return empty mask if searching range is outside the echosounder range
     if (r0 > r[-1]) or (r1 < r[0]):
-        mask = np.zeros_like(Sv, dtype=bool)
-        mask_ = np.zeros_like(Sv, dtype=bool)
-
         # Raise a warning to inform the user
         warnings.warn(
             "The searching range is outside the echosounder range. "
-            "A default mask with all False values is returned, "
+            "A default mask with all True values is returned, "
             "which won't mask any data points in the dataset."
         )
-
-        combined_mask = mask
         return xr.DataArray(
-            combined_mask,
+            np.ones_like(Sv, dtype=bool),
             dims=("ping_time", "range_sample"),
             coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
         )
 
     # get upper and lower range indexes
-    up = np.argmin(abs(r - r0))
-    lw = np.argmin(abs(r - r1))
+    up = abs(r - r0).argmin(dim="range_sample").values
+    lw = abs(r - r1).argmin(dim="range_sample").values
 
     # get minimum range index admitted for processing
-    rmin = np.argmin(abs(r - roff))
+    # rmin = abs(r - roff).argmin(dim="range_sample").values
     # get scaling factor index
-    sf = np.argmin(abs(r - jumps))
-    # start masking process
-    mask_ = np.zeros(Sv.shape, dtype=bool)
-    mask = np.zeros(Sv.shape, dtype=bool)
-    for j in range(start, len(Sv)):
-        # mask where TN evaluation is unfeasible (e.g. edge issues, all-NANs)
-        if (j - n < 0) | (j + n > len(Sv) - 1) | np.all(np.isnan(Sv[j, up:lw])):
-            mask_[j, :] = True
-        # evaluate ping and block averages otherwise
-
-        else:
-            pingmedian = _log(np.nanmedian(_lin(Sv[j, up:lw])))
-            pingp75 = _log(np.nanpercentile(_lin(Sv[j, up:lw]), 75))
-            blockmedian = _log(np.nanmedian(_lin(Sv[j - n : j + n, up:lw])))
-
-            # if ping median below 'maxts' permitted, and above enough from the
-            # block median, mask all the way up until noise disappears
-            if (pingp75 < maxts) & ((pingmedian - blockmedian) > thr[0]):
-                r0, r1 = lw - sf, lw
-                while r0 > rmin:
-                    pingmedian = _log(np.nanmedian(_lin(Sv[j, r0:r1])))
-                    blockmedian = _log(np.nanmedian(_lin(Sv[j - n : j + n, r0:r1])))
-                    r0, r1 = r0 - sf, r1 - sf
-                    if (pingmedian - blockmedian) < thr[1]:
-                        break
-                mask[j, r0:] = True
-
-    mask = mask[:, start:] | mask_[:, start:]
-    mask = np.logical_not(mask)
-    return_mask = xr.DataArray(
-        mask,
+    sf = abs(r - jumps).argmin(dim="range_sample").values
+
+    range_mask = (Sv.range_sample >= up) & (Sv.range_sample <= lw)
+    Sv_range = Sv.where(range_mask, np.nan)
+
+    # get columns in which no processing can be done - question, do we want to mask them out?
+    nan_mask = Sv_range.isnull()
+    nan_mask = nan_mask.reduce(np.any, dim="range_sample")
+    nan_mask[0:n] = False
+    nan_mask[-n:] = False
+    """
+    nan_full_mask = xr.DataArray(
+        data=line_to_square(nan_mask, Sv, "range_sample").transpose(),
+        dims=Sv.dims,
+        coords=Sv.coords,
+    )
+    """
+
+    ping_median = _log(_lin(Sv_range).median(dim="range_sample", skipna=True))
+    ping_75q = _log(_lin(Sv_range).reduce(np.nanpercentile, q=75, dim="range_sample"))
+
+    block = Sv_range[:, up:lw]
+    block_list = [block.shift({"ping_time": i}) for i in range(-n, n)]
+    concat_block = xr.concat(block_list, dim="range_sample")
+    block_median = _log(_lin(concat_block).median(dim="range_sample", skipna=True))
+
+    # identify columns in which noise can be found
+    noise_column = (ping_75q < maxts) & ((ping_median - block_median) < thr[0])
+
+    noise_column_mask = xr.DataArray(
+        data=line_to_square(noise_column, Sv, "range_sample").transpose(),
+        dims=Sv.dims,
+        coords=Sv.coords,
+    )
+
+    # figure out how far noise extends
+    ping_median = _log(_lin(Sv_range).rolling(range_sample=sf).reduce(func=np.nanmedian))
+    block_median = _log(
+        _lin(Sv_range).rolling(range_sample=sf, ping_time=2 * n + 1).reduce(func=np.nanmedian)
+    )
+    height_mask = ping_median - block_median < thr[1]
+
+    height_noise_mask = height_mask | noise_column_mask
+
+    flipped_mask = height_noise_mask.isel(range_sample=slice(None, None, -1))
+    flipped_mask["range_sample"] = height_mask["range_sample"]
+    neg_mask = ~height_noise_mask
+
+    # propagate break upward
+    flipped_mask = neg_mask.isel(range_sample=slice(None, None, -1))
+    flipped_mask["range_sample"] = height_mask["range_sample"]
+    flipped_mask = ~flipped_mask
+    ft = len(flipped_mask.range_sample) - flipped_mask.argmax(dim="range_sample")
+
+    first_true_indices = xr.DataArray(
+        line_to_square(ft, flipped_mask, dim="range_sample").transpose(),
         dims=("ping_time", "range_sample"),
-        coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
+        coords={"ping_time": channel_Sv.ping_time, "range_sample": channel_Sv.range_sample},
     )
-    return return_mask
+
+    indices = xr.DataArray(
+        line_to_square(height_noise_mask["range_sample"], height_noise_mask, dim="ping_time"),
+        dims=("ping_time", "range_sample"),
+        coords={"ping_time": channel_Sv.ping_time, "range_sample": channel_Sv.range_sample},
+    )
+
+    noise_spike_mask = height_noise_mask.where(indices > first_true_indices, True)
+
+    mask = noise_spike_mask
+
+    # uncomment if we want to mask out the columns where no processing could be done,
+    # mask = nan_full_mask & noise_spike_mask
+    mask = mask.drop("channel")
+    return mask
diff --git a/echopype/echodata/sensor_ep_version_mapping/v05x_to_v06x.py b/echopype/echodata/sensor_ep_version_mapping/v05x_to_v06x.py
index 8053aa94a..a83d9f2bc 100644
--- a/echopype/echodata/sensor_ep_version_mapping/v05x_to_v06x.py
+++ b/echopype/echodata/sensor_ep_version_mapping/v05x_to_v06x.py
@@ -753,7 +753,7 @@ def _rename_and_add_time_vars_ek60(ed_obj):
     the variable time3, renames the variable ``water_level``
     time coordinate to time3, and changes ``ping_time`` to
     ``time2`` for the variables ``pitch/roll/vertical_offset``.
-    2. For EK60's ``Envrionment`` group this function renames
+    2. For EK60's ``Environment`` group this function renames
     ``ping_time`` to ``time1``.
 
     Parameters
diff --git a/echopype/mask/seabed.py b/echopype/mask/seabed.py
index 698295446..2a767082d 100644
--- a/echopype/mask/seabed.py
+++ b/echopype/mask/seabed.py
@@ -141,7 +141,7 @@ def _maxSv(Sv_ds: xr.DataArray, desired_channel: str, parameters: dict = MAX_SV_
     maxSv[np.isnan(maxSv)] = -999
     idx[maxSv < thr[0]] = 0
 
-    # mask seabed, proceed only with acepted seabed indexes (!=0)
+    # mask seabed, proceed only with accepted seabed indexes (!=0)
     idx = idx
     mask = np.zeros(Sv.shape, dtype=bool)
     for j, i in enumerate(idx):
@@ -221,7 +221,7 @@ def _deltaSv(Sv_ds: xr.DataArray, desired_channel: str, parameters: dict = MAX_S
     # get indexes for the first value above threshold, along every ping
     idx = np.nanargmax((Svdiff[r0:r1, :] > thr), axis=0) + r0
 
-    # mask seabed, proceed only with acepted seabed indexes (!=0)
+    # mask seabed, proceed only with accepted seabed indexes (!=0)
     idx = idx
     mask = np.zeros(Sv.shape, dtype=bool)
     for j, i in enumerate(idx):
@@ -331,9 +331,12 @@ def _blackwell(Sv_ds: xr.DataArray, desired_channel: str, parameters: dict = MAX
         below = np.zeros((len(r) - r1, maskchunk.shape[1]), dtype=bool)
         mask = np.r_[above, maskchunk, below]
 
-    # return empty mask if aliased-seabed was not detected in Theta & Phi
+    # give empty mask if aliased-seabed was not detected in Theta & Phi
     else:
-        warnings.warn("No aliased seabed detected in Theta & Phi. Returning empty mask.")
+        warnings.warn(
+            "No aliased seabed detected in Theta & Phi. "
+            "A default mask with all True values is returned."
+        )
         mask = np.zeros_like(Sv, dtype=bool)
 
     mask = np.logical_not(mask.T)
@@ -423,9 +426,12 @@ def _blackwell_mod(
     if r0 > r1:
         raise Exception("Minimum range has to be shorter than maximum range")
 
-    # return empty mask if searching range is outside the echosounder range
+    # give empty mask if searching range is outside the echosounder range
     if (r0 > r[-1]) or (r1 < r[0]):
-        warnings.warn("Search range is outside the echosounder range. Returning empty mask.")
+        warnings.warn(
+            "Search range is outside the echosounder range."
+            "A default mask with all True values is returned."
+        )
         mask = np.zeros_like(Sv, dtype=bool)
 
     # delimit the analysis within user-defined range limits
@@ -485,9 +491,12 @@ def _blackwell_mod(
         below = np.zeros((len(r) - i1, maskchunk.shape[1]), dtype=bool)
         mask = np.r_[above, maskchunk, below]
 
-    # return empty mask if aliased-seabed was not detected in Theta & Phi
+    # give empty mask if aliased-seabed was not detected in Theta & Phi
     else:
-        warnings.warn("Aliased seabed not detected in Theta & Phi. Returning empty mask.")
+        warnings.warn(
+            "Aliased seabed not detected in Theta & Phi."
+            "A default mask with all True values is returned."
+        )
         mask = np.zeros_like(Sv, dtype=bool)
 
     mask = np.logical_not(mask.T)
@@ -708,9 +717,12 @@ def _ariza(Sv_ds: xr.DataArray, desired_channel: str, parameters: dict = MAX_SV_
     if r0 > r1:
         raise Exception("Minimum range has to be shorter than maximum range")
 
-    # return empty mask if searching range is outside the echosounder range
+    # give empty mask if searching range is outside the echosounder range
     if (r0 > r[-1]) or (r1 < r[0]):
-        warnings.warn("Search range is outside the echosounder range. Returning empty mask.")
+        warnings.warn(
+            "Search range is outside the echosounder range. "
+            "A default mask with all True values is returned."
+        )
         mask = np.zeros_like(Sv, dtype=bool)
 
     # get indexes for range offset and range limits
@@ -723,9 +735,11 @@ def _ariza(Sv_ds: xr.DataArray, desired_channel: str, parameters: dict = MAX_SV_
     Sv_[0:r0, :] = -999
     Sv_[r1:, :] = -999
 
-    # return empty mask if there is nothing above threshold
+    # give empty mask if there is nothing above threshold
     if not (Sv_ > thr).any():
-        warnings.warn("Nothing found above the threshold. Returning empty mask.")
+        warnings.warn(
+            "Nothing found above the threshold. " "A default mask with all True values is returned."
+        )
         mask = np.zeros_like(Sv_, dtype=bool)
 
     # search for seabed otherwise
diff --git a/echopype/tests/calibrate/test_cal_params.py b/echopype/tests/calibrate/test_cal_params.py
index aabfe2039..b6332438a 100644
--- a/echopype/tests/calibrate/test_cal_params.py
+++ b/echopype/tests/calibrate/test_cal_params.py
@@ -148,7 +148,7 @@ def test_param2da(p_val, channel, da_output):
             marks=pytest.mark.xfail(strict=True, reason="input sa_correction does not contain a 'channel' coordinate"),
         ),
         # input individual param:
-        #   - with channel cooridinate but not identical to argin channel: fail with value error
+        #   - with channel coordinate but not identical to argin channel: fail with value error
         pytest.param(
             "EK80",
             {"sa_correction": xr.DataArray([1, 1], dims=["channel"], coords={"channel": ["chA", "B"]})},
@@ -157,7 +157,7 @@ def test_param2da(p_val, channel, da_output):
                 reason="input sa_correction contains a 'channel' coordinate but it is not identical with input channel"),
         ),
         # input individual param:
-        #   - with channel cooridinate identical to argin channel: should pass
+        #   - with channel coordinate identical to argin channel: should pass
         pytest.param(
             "EK80",
             {"sa_correction": xr.DataArray([1, 1], dims=["channel"], coords={"channel": ["chA", "chB"]})},
diff --git a/echopype/tests/clean/test_clean_impulse_noise.py b/echopype/tests/clean/test_clean_impulse_noise.py
index 5bd55aa83..493b4bd03 100644
--- a/echopype/tests/clean/test_clean_impulse_noise.py
+++ b/echopype/tests/clean/test_clean_impulse_noise.py
@@ -16,9 +16,9 @@
 @pytest.mark.parametrize(
     "method,parameters,desired_channel,desired_frequency,expected_true_false_counts",
     [
-        ("ryan", RYAN_DEFAULT_PARAMS, DESIRED_CHANNEL, None, (2130885, 32419)),
-        ("ryan_iterable", RYAN_ITERABLE_DEFAULT_PARAMS, DESIRED_CHANNEL, None, (2124976, 38328)),
-        ("wang", WANG_DEFAULT_PARAMS, None, DESIRED_FREQUENCY, (635732, 1527572)),
+        ("ryan", RYAN_DEFAULT_PARAMS, DESIRED_CHANNEL, None, (2121702, 41602)),
+        ("ryan_iterable", RYAN_ITERABLE_DEFAULT_PARAMS, DESIRED_CHANNEL, None, (2108295, 55009)),
+        # ("wang", WANG_DEFAULT_PARAMS, None, DESIRED_FREQUENCY, (635732, 1527572)),
     ],
 )
 def test_get_impulse_noise_mask(
diff --git a/echopype/tests/clean/test_clean_transient_noise.py b/echopype/tests/clean/test_clean_transient_noise.py
index 6c668b957..787e4d29d 100644
--- a/echopype/tests/clean/test_clean_transient_noise.py
+++ b/echopype/tests/clean/test_clean_transient_noise.py
@@ -5,15 +5,14 @@
 import echopype.clean
 from echopype.clean.transient_noise import RYAN_DEFAULT_PARAMS, FIELDING_DEFAULT_PARAMS
 
-
 # Note: We've removed all the setup and utility functions since they are now in conftest.py
 
 
 @pytest.mark.parametrize(
     "method, parameters ,expected_true_false_counts",
     [
-        ("ryan", RYAN_DEFAULT_PARAMS, (1941916, 225015)),
-        ("fielding", FIELDING_DEFAULT_PARAMS, (1890033, 276898)),
+        ("ryan", RYAN_DEFAULT_PARAMS, (2115052, 51879)),
+        ("fielding", FIELDING_DEFAULT_PARAMS, (2117327, 49604)),
     ],
 )
 def test_get_transient_mask(
diff --git a/echopype/tests/clean/test_signal_attenuation.py b/echopype/tests/clean/test_signal_attenuation.py
index 5ac1f1f3e..eec266d5d 100644
--- a/echopype/tests/clean/test_signal_attenuation.py
+++ b/echopype/tests/clean/test_signal_attenuation.py
@@ -4,14 +4,17 @@
 
 
 DEFAULT_RYAN_PARAMS = {"r0": 180, "r1": 280, "n": 30, "thr": -6, "start": 0}
-DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}
+
+# commented ariza out since the current interpretation relies on a
+# preexisting seabed mask, which is not available in this PR
+# DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}
 
 
 @pytest.mark.parametrize(
     "method,parameters,expected_true_false_counts",
     [
-        ("ryan", DEFAULT_RYAN_PARAMS, (1613100, 553831)),
-        ("ariza", DEFAULT_ARIZA_PARAMS, (39897, 2127034)),
+        ("ryan", DEFAULT_RYAN_PARAMS, (1838934, 327997)),
+        # ("ariza", DEFAULT_ARIZA_PARAMS, (39897, 2127034)),
     ],
 )
 def test_get_signal_attenuation_mask(
@@ -28,4 +31,5 @@ def test_get_signal_attenuation_mask(
     count_true = np.count_nonzero(mask)
     count_false = mask.size - count_true
     true_false_counts = (count_true, count_false)
+    print(true_false_counts)
     assert true_false_counts == expected_true_false_counts
diff --git a/echopype/tests/commongrid/conftest.py b/echopype/tests/commongrid/conftest.py
index 9c30c504f..851e6dfaa 100644
--- a/echopype/tests/commongrid/conftest.py
+++ b/echopype/tests/commongrid/conftest.py
@@ -1,5 +1,525 @@
 import pytest
 
+import xarray as xr
+import numpy as np
+import pandas as pd
+
+from echopype.consolidate import add_depth
+import echopype as ep
+
+
+@pytest.fixture
+def random_number_generator():
+    """Random number generator for tests"""
+    return np.random.default_rng()
+
+
+@pytest.fixture
+def mock_nan_ilocs():
+    """NaN i locations for irregular Sv dataset
+
+    It's a list of tuples, each tuple contains
+    (channel, ping_time, range_sample)
+
+    Notes
+    -----
+    This was created with the following code:
+
+    ```
+    import numpy as np
+
+    random_positions = []
+    for i in range(20):
+        random_positions.append((
+            np.random.randint(0, 2),
+            np.random.randint(0, 5),
+            np.random.randint(0, 20))
+        )
+    ```
+    """
+    return [
+        (1, 1, 10),
+        (1, 0, 16),
+        (0, 3, 6),
+        (0, 2, 11),
+        (0, 2, 6),
+        (1, 1, 14),
+        (0, 1, 17),
+        (1, 4, 19),
+        (0, 3, 3),
+        (0, 0, 19),
+        (0, 1, 5),
+        (1, 2, 9),
+        (1, 4, 18),
+        (0, 1, 5),
+        (0, 4, 4),
+        (0, 1, 6),
+        (1, 2, 2),
+        (0, 1, 2),
+        (0, 4, 8),
+        (0, 1, 1),
+    ]
+
+
+@pytest.fixture
+def mock_parameters():
+    """Small mock parameters"""
+    return {
+        "channel_len": 2,
+        "ping_time_len": 10,
+        "depth_len": 20,
+        "ping_time_interval": "0.3S",
+    }
+
+
+@pytest.fixture
+def mock_Sv_sample(mock_parameters):
+    """
+    Mock Sv sample
+
+    Dimension: (2, 10, 20)
+    """
+    channel_len = mock_parameters["channel_len"]
+    ping_time_len = mock_parameters["ping_time_len"]
+    depth_len = mock_parameters["depth_len"]
+
+    depth_data = np.linspace(0, 1, num=depth_len)
+    return np.tile(depth_data, (channel_len, ping_time_len, 1))
+
+
+@pytest.fixture
+def mock_Sv_dataset_regular(mock_parameters, mock_Sv_sample):
+    ds_Sv = _gen_Sv_echo_range_regular(**mock_parameters, ping_time_jitter_max_ms=0)
+    ds_Sv["Sv"].data = mock_Sv_sample
+    return ds_Sv
+
+
+@pytest.fixture
+def mock_Sv_dataset_irregular(mock_parameters, mock_Sv_sample, mock_nan_ilocs):
+    depth_interval = [0.5, 0.32, 0.2]
+    depth_ping_time_len = [2, 3, 5]
+    ds_Sv = _gen_Sv_echo_range_irregular(
+        **mock_parameters,
+        depth_interval=depth_interval,
+        depth_ping_time_len=depth_ping_time_len,
+        ping_time_jitter_max_ms=30,  # Added jitter to ping_time
+    )
+    ds_Sv["Sv"].data = mock_Sv_sample
+    # Sprinkle nans around echo_range
+    for pos in mock_nan_ilocs:
+        ds_Sv["echo_range"][pos] = np.nan
+    return ds_Sv
+
+
+@pytest.fixture
+def mock_mvbs_inputs():
+    return dict(range_meter_bin=2, ping_time_bin="1s")
+
+
+@pytest.fixture
+def mock_mvbs_array_regular(mock_Sv_dataset_regular, mock_mvbs_inputs, mock_parameters):
+    """
+    Mock Sv sample result from compute_MVBS
+
+    Dimension: (2, 3, 5)
+    Ping time bin: 1s
+    Range bin: 2m
+    """
+    ds_Sv = mock_Sv_dataset_regular
+    ping_time_bin = mock_mvbs_inputs["ping_time_bin"]
+    range_bin = mock_mvbs_inputs["range_meter_bin"]
+    channel_len = mock_parameters["channel_len"]
+    expected_mvbs_val = _get_expected_mvbs_val(
+        ds_Sv, ping_time_bin, range_bin, channel_len
+    )
+
+    return expected_mvbs_val
+
+
+@pytest.fixture
+def mock_mvbs_array_irregular(mock_Sv_dataset_irregular, mock_mvbs_inputs, mock_parameters):
+    """
+    Mock Sv sample irregular result from compute_MVBS
+
+    Dimension: (2, 3, 5)
+    Ping time bin: 1s
+    Range bin: 2m
+    """
+    ds_Sv = mock_Sv_dataset_irregular
+    ping_time_bin = mock_mvbs_inputs["ping_time_bin"]
+    range_bin = mock_mvbs_inputs["range_meter_bin"]
+    channel_len = mock_parameters["channel_len"]
+    expected_mvbs_val = _get_expected_mvbs_val(
+        ds_Sv, ping_time_bin, range_bin, channel_len
+    )
+
+    return expected_mvbs_val
+
+
+@pytest.fixture(
+    params=[
+        (
+            ("EK60", "ncei-wcsd", "Summer2017-D20170719-T211347.raw"),
+            "EK60",
+            None,
+            {},
+        ),
+        (
+            ("EK80_NEW", "echopype-test-D20211004-T235930.raw"),
+            "EK80",
+            None,
+            {"waveform_mode": "BB", "encode_mode": "complex"},
+        ),
+        (
+            ("EK80_NEW", "D20211004-T233354.raw"),
+            "EK80",
+            None,
+            {"waveform_mode": "CW", "encode_mode": "power"},
+        ),
+        (
+            ("EK80_NEW", "D20211004-T233115.raw"),
+            "EK80",
+            None,
+            {"waveform_mode": "CW", "encode_mode": "complex"},
+        ),
+        (("ES70", "D20151202-T020259.raw"), "ES70", None, {}),
+        (("AZFP", "17082117.01A"), "AZFP", ("AZFP", "17041823.XML"), {}),
+        (
+            ("AD2CP", "raw", "090", "rawtest.090.00001.ad2cp"),
+            "AD2CP",
+            None,
+            {},
+        ),
+    ],
+    ids=[
+        "ek60_cw_power",
+        "ek80_bb_complex",
+        "ek80_cw_power",
+        "ek80_cw_complex",
+        "es70",
+        "azfp",
+        "ad2cp",
+    ],
+)
+def test_data_samples(request, test_path):
+    (
+        filepath,
+        sonar_model,
+        azfp_xml_path,
+        range_kwargs,
+    ) = request.param
+    if sonar_model.lower() in ["es70", "ad2cp"]:
+        pytest.xfail(
+            reason="Not supported at the moment",
+        )
+    path_model, *paths = filepath
+    filepath = test_path[path_model].joinpath(*paths)
+
+    if azfp_xml_path is not None:
+        path_model, *paths = azfp_xml_path
+        azfp_xml_path = test_path[path_model].joinpath(*paths)
+    return (
+        filepath,
+        sonar_model,
+        azfp_xml_path,
+        range_kwargs,
+    )
+
+
+@pytest.fixture
+def regular_data_params():
+    return {
+        "channel_len": 4,
+        "depth_len": 4000,
+        "ping_time_len": 100,
+        "ping_time_jitter_max_ms": 0,
+    }
+
+
+@pytest.fixture
+def ds_Sv_echo_range_regular(regular_data_params, random_number_generator):
+    return _gen_Sv_echo_range_regular(
+        **regular_data_params,
+        random_number_generator=random_number_generator,
+    )
+
+
+@pytest.fixture
+def latlon_history_attr():
+    return (
+        "2023-08-31 12:00:00.000000 +00:00. "
+        "Interpolated or propagated from Platform latitude/longitude."  # noqa
+    )
+
+
+@pytest.fixture
+def lat_attrs(latlon_history_attr):
+    """Latitude attributes"""
+    return {
+        "long_name": "Platform latitude",
+        "standard_name": "latitude",
+        "units": "degrees_north",
+        "valid_range": "(-90.0, 90.0)",
+        "history": latlon_history_attr,
+    }
+
+
+@pytest.fixture
+def lon_attrs(latlon_history_attr):
+    """Longitude attributes"""
+    return {
+        "long_name": "Platform longitude",
+        "standard_name": "longitude",
+        "units": "degrees_east",
+        "valid_range": "(-180.0, 180.0)",
+        "history": latlon_history_attr,
+    }
+
+
+@pytest.fixture
+def depth_offset():
+    """Depth offset for calculating depth"""
+    return 2.5
+
+
+@pytest.fixture
+def ds_Sv_echo_range_regular_w_latlon(ds_Sv_echo_range_regular, lat_attrs, lon_attrs):
+    """Sv dataset with latitude and longitude"""
+    n_pings = ds_Sv_echo_range_regular.ping_time.shape[0]
+    latitude = np.linspace(42, 43, num=n_pings)
+    longitude = np.linspace(-124, -125, num=n_pings)
+
+    ds_Sv_echo_range_regular["latitude"] = (["ping_time"], latitude, lat_attrs)
+    ds_Sv_echo_range_regular["longitude"] = (["ping_time"], longitude, lon_attrs)
+
+    # Need processing level code for compute MVBS to work!
+    ds_Sv_echo_range_regular.attrs["processing_level"] = "Level 2A"
+    return ds_Sv_echo_range_regular
+
+
+@pytest.fixture
+def ds_Sv_echo_range_regular_w_depth(ds_Sv_echo_range_regular, depth_offset):
+    """Sv dataset with depth"""
+    return ds_Sv_echo_range_regular.pipe(add_depth, depth_offset=depth_offset)
+
+
+@pytest.fixture
+def ds_Sv_echo_range_irregular(random_number_generator):
+    depth_interval = [0.5, 0.32, 0.13]
+    depth_ping_time_len = [100, 300, 200]
+    ping_time_len = 600
+    ping_time_interval = "0.3S"
+    return _gen_Sv_echo_range_irregular(
+        depth_interval=depth_interval,
+        depth_ping_time_len=depth_ping_time_len,
+        ping_time_len=ping_time_len,
+        ping_time_interval=ping_time_interval,
+        ping_time_jitter_max_ms=0,
+        random_number_generator=random_number_generator,
+    )
+
+
+# Helper functions to generate mock Sv and MVBS dataset
+def _get_expected_mvbs_val(
+    ds_Sv: xr.Dataset, ping_time_bin: str, range_bin: float, channel_len: int = 2
+) -> np.ndarray:
+    """
+    Helper functions to generate expected MVBS outputs from mock Sv dataset
+    by brute-force looping and compute the mean
+    
+    Parameters
+    ----------
+    ds_Sv : xr.Dataset
+        Mock Sv dataset
+    ping_time_bin : str
+        Ping time bin
+    range_bin : float
+        Range bin
+    channel_len : int, default 2
+        Number of channels
+    """
+    # create bin information needed for ping_time
+    d_index = (
+        ds_Sv["ping_time"]
+        .resample(ping_time=ping_time_bin, skipna=True)
+        .first()  # Not actually being used, but needed to get the bin groups
+        .indexes["ping_time"]
+    )
+    ping_interval = d_index.union([d_index[-1] + pd.Timedelta(ping_time_bin)]).values
+
+    # create bin information for echo_range
+    # this computes the echo range max since there might NaNs in the data
+    echo_range_max = ds_Sv["echo_range"].max()
+    range_interval = np.arange(0, echo_range_max + 2, range_bin)
+
+    sv = ds_Sv["Sv"].pipe(ep.utils.compute._log2lin)
+
+    expected_mvbs_val = np.ones((2, len(ping_interval) - 1, len(range_interval) - 1)) * np.nan
+
+    for ch_idx in range(channel_len):
+        for p_idx in range(len(ping_interval) - 1):
+            for r_idx in range(len(range_interval) - 1):
+                echo_range = (
+                    ds_Sv['echo_range']
+                    .isel(channel=ch_idx)
+                    .sel(ping_time=slice(ping_interval[p_idx], ping_interval[p_idx+1]))
+                )
+                r_idx_active = np.logical_and(
+                    echo_range.data >= range_interval[r_idx],
+                    echo_range.data < range_interval[r_idx+1]
+                )
+                sv_tmp = sv.isel(channel=ch_idx).sel(
+                    ping_time=slice(ping_interval[p_idx], ping_interval[p_idx+1])).data[r_idx_active]
+                if 0 in sv_tmp.shape:
+                    expected_mvbs_val[ch_idx, p_idx, r_idx] = np.nan
+                else:
+                    expected_mvbs_val[ch_idx, p_idx, r_idx] = np.mean(sv_tmp)
+    return ep.utils.compute._lin2log(expected_mvbs_val)
+
+
+def _gen_ping_time(ping_time_len, ping_time_interval, ping_time_jitter_max_ms=0):
+    ping_time = pd.date_range("2018-07-01", periods=ping_time_len, freq=ping_time_interval)
+    if ping_time_jitter_max_ms != 0:  # if to add jitter
+        jitter = (
+            np.random.randint(ping_time_jitter_max_ms, size=ping_time_len) / 1000
+        )  # convert to seconds
+        ping_time = pd.to_datetime(ping_time.astype(int) / 1e9 + jitter, unit="s")
+    return ping_time
+
+
+def _gen_Sv_echo_range_regular(
+    channel_len=2,
+    depth_len=100,
+    depth_interval=0.5,
+    ping_time_len=600,
+    ping_time_interval="0.3S",
+    ping_time_jitter_max_ms=0,
+    random_number_generator=None,
+):
+    """
+    Generate a Sv dataset with uniform echo_range across all ping_time.
+
+    ping_time_jitter_max_ms controlled jitter in milliseconds in ping_time.
+    
+    Parameters
+    ------------
+    channel_len
+        number of channels
+    depth_len
+        number of total depth bins
+    depth_interval
+        depth intervals, may have multiple values
+    ping_time_len
+        total number of ping_time
+    ping_time_interval
+        interval between pings
+    ping_time_jitter_max_ms
+        jitter of ping_time in milliseconds
+    """
+
+    if random_number_generator is None:
+        random_number_generator = np.random.default_rng()
+
+    # regular echo_range
+    echo_range = np.array([[np.arange(depth_len)] * ping_time_len] * channel_len) * depth_interval
+
+    # generate dataset
+    ds_Sv = xr.Dataset(
+        data_vars={
+            "Sv": (
+                ["channel", "ping_time", "range_sample"],
+                random_number_generator.random(size=(channel_len, ping_time_len, depth_len)),
+            ),
+            "echo_range": (["channel", "ping_time", "range_sample"], echo_range),
+            "frequency_nominal": (["channel"], np.arange(channel_len)),
+        },
+        coords={
+            "channel": [f"ch_{ch}" for ch in range(channel_len)],
+            "ping_time": _gen_ping_time(ping_time_len, ping_time_interval, ping_time_jitter_max_ms),
+            "range_sample": np.arange(depth_len),
+        },
+    )
+
+    return ds_Sv
+
+
+def _gen_Sv_echo_range_irregular(
+    channel_len=2,
+    depth_len=100,
+    depth_interval=[0.5, 0.32, 0.13],
+    depth_ping_time_len=[100, 300, 200],
+    ping_time_len=600,
+    ping_time_interval="0.3S",
+    ping_time_jitter_max_ms=0,
+    random_number_generator=None,
+):
+    """
+    Generate a Sv dataset with uniform echo_range across all ping_time.
+
+    ping_time_jitter_max_ms controlled jitter in milliseconds in ping_time.
+    
+    Parameters
+    ------------
+    channel_len
+        number of channels
+    depth_len
+        number of total depth bins
+    depth_interval
+        depth intervals, may have multiple values
+    depth_ping_time_len
+        the number of pings to use each of the depth_interval
+        for example, with depth_interval=[0.5, 0.32, 0.13] 
+        and depth_ping_time_len=[100, 300, 200],
+        the first 100 pings have echo_range with depth intervals of 0.5 m,
+        the next 300 pings have echo_range with depth intervals of 0.32 m,
+        and the last 200 pings have echo_range with depth intervals of 0.13 m.
+    ping_time_len
+        total number of ping_time
+    ping_time_interval
+        interval between pings
+    ping_time_jitter_max_ms
+        jitter of ping_time in milliseconds
+    """
+    if random_number_generator is None:
+        random_number_generator = np.random.default_rng()
+
+    # check input
+    if len(depth_interval) != len(depth_ping_time_len):
+        raise ValueError("The number of depth_interval and depth_ping_time_len must be equal!")
+
+    if ping_time_len != np.array(depth_ping_time_len).sum():
+        raise ValueError("The number of total pings does not match!")
+
+    # irregular echo_range
+    echo_range_list = []
+    for d, dp in zip(depth_interval, depth_ping_time_len):
+        echo_range_list.append(np.array([[np.arange(depth_len)] * dp] * channel_len) * d)
+    echo_range = np.hstack(echo_range_list)
+
+    # generate dataset
+    ds_Sv = xr.Dataset(
+        data_vars={
+            "Sv": (
+                ["channel", "ping_time", "range_sample"],
+                random_number_generator.random(size=(channel_len, ping_time_len, depth_len)),
+            ),
+            "echo_range": (["channel", "ping_time", "range_sample"], echo_range),
+            "frequency_nominal": (["channel"], np.arange(channel_len)),
+        },
+        coords={
+            "channel": [f"ch_{ch}" for ch in range(channel_len)],
+            "ping_time": _gen_ping_time(ping_time_len, ping_time_interval, ping_time_jitter_max_ms),
+            "range_sample": np.arange(depth_len),
+        },
+    )
+
+    return ds_Sv
+
+
+# End helper functions
+
+import pytest
+
 import xarray as xr
 import numpy as np
 import pandas as pd
diff --git a/echopype/tests/utils/test_utils_io.py b/echopype/tests/utils/test_utils_io.py
index 0fa3db6ea..2bc6c2e7e 100644
--- a/echopype/tests/utils/test_utils_io.py
+++ b/echopype/tests/utils/test_utils_io.py
@@ -12,7 +12,7 @@
     validate_output_path,
     env_indep_joinpath,
     validate_source_ds_da,
-    init_ep_dir
+    init_ep_dir,
 )
 import echopype.utils.io
 
@@ -20,30 +20,30 @@
 @pytest.mark.parametrize(
     "file_path, should_fail, file_type",
     [
-        ('https://example.com/test.nc', True, 'nc'),
-        ('https://example.com/test.zarr', False, 'zarr'),
-        (os.path.join('folder', 'test.nc'), False, 'nc'),
-        (os.path.join('folder', 'test.zarr'), False, 'zarr'),
-        (Path('https:/example.com/test.nc'), True, 'nc'),
-        (Path('https:/example.com/test.zarr'), True, 'zarr'),
-        (Path('folder/test.nc'), False, 'nc'),
-        (Path('folder/test.zarr'), False, 'zarr'),
-        (fsspec.get_mapper('https://example.com/test.nc'), True, 'nc'),
-        (fsspec.get_mapper('https:/example.com/test.zarr'), False, 'zarr'),
-        (fsspec.get_mapper('folder/test.nc'), False, 'nc'),
-        (fsspec.get_mapper('folder/test.zarr'), False, 'zarr'),
-        ('https://example.com/test.jpeg', True, 'jpeg'),
-        (Path('https://example.com/test.jpeg'), True, 'jpeg'),
-        (fsspec.get_mapper('https://example.com/test.jpeg'), True, 'jpeg'),
+        ("https://example.com/test.nc", True, "nc"),
+        ("https://example.com/test.zarr", False, "zarr"),
+        (os.path.join("folder", "test.nc"), False, "nc"),
+        (os.path.join("folder", "test.zarr"), False, "zarr"),
+        (Path("https:/example.com/test.nc"), True, "nc"),
+        (Path("https:/example.com/test.zarr"), True, "zarr"),
+        (Path("folder/test.nc"), False, "nc"),
+        (Path("folder/test.zarr"), False, "zarr"),
+        (fsspec.get_mapper("https://example.com/test.nc"), True, "nc"),
+        (fsspec.get_mapper("https:/example.com/test.zarr"), False, "zarr"),
+        (fsspec.get_mapper("folder/test.nc"), False, "nc"),
+        (fsspec.get_mapper("folder/test.zarr"), False, "zarr"),
+        ("https://example.com/test.jpeg", True, "jpeg"),
+        (Path("https://example.com/test.jpeg"), True, "jpeg"),
+        (fsspec.get_mapper("https://example.com/test.jpeg"), True, "jpeg"),
     ],
 )
 def test_sanitize_file_path(file_path, should_fail, file_type):
     try:
         sanitized = sanitize_file_path(file_path)
         if not should_fail:
-            if file_type == 'nc':
+            if file_type == "nc":
                 assert isinstance(sanitized, Path) is True
-            elif file_type == 'zarr':
+            elif file_type == "zarr":
                 assert isinstance(sanitized, fsspec.FSMap) is True
     except Exception as e:
         assert isinstance(e, ValueError) is True
@@ -53,41 +53,41 @@ def test_sanitize_file_path(file_path, should_fail, file_type):
     "save_path, engine",
     [
         # Netcdf tests
-        (os.path.join('folder', 'new_test.nc'), 'netcdf4'),
-        (os.path.join('folder', 'new_test.nc'), 'zarr'),
-        (os.path.join('folder', 'path', 'new_test.nc'), 'netcdf4'),
-        ('folder/', 'netcdf4'),
-        ('s3://ooi-raw-data/', 'netcdf4'),
-        (Path('folder/'), 'netcdf4'),
-        (Path('folder/new_test.nc'), 'netcdf4'),
+        (os.path.join("folder", "new_test.nc"), "netcdf4"),
+        (os.path.join("folder", "new_test.nc"), "zarr"),
+        (os.path.join("folder", "path", "new_test.nc"), "netcdf4"),
+        ("folder/", "netcdf4"),
+        ("s3://ooi-raw-data/", "netcdf4"),
+        (Path("folder/"), "netcdf4"),
+        (Path("folder/new_test.nc"), "netcdf4"),
         # Zarr tests
-        (os.path.join('folder', 'new_test.zarr'), 'zarr'),
-        (os.path.join('folder', 'new_test.zarr'), 'netcdf4'),
-        (os.path.join('folder', 'path', 'new_test.zarr'), 'zarr'),
-        ('folder/', 'zarr'),
+        (os.path.join("folder", "new_test.zarr"), "zarr"),
+        (os.path.join("folder", "new_test.zarr"), "netcdf4"),
+        (os.path.join("folder", "path", "new_test.zarr"), "zarr"),
+        ("folder/", "zarr"),
         # Empty tests
-        (None, 'netcdf4'),
-        (None, 'zarr'),
+        (None, "netcdf4"),
+        (None, "zarr"),
         # Remotes
-        ('https://example.com/test.zarr', 'zarr'),
-        ('https://example.com/', 'zarr'),
-        ('https://example.com/test.nc', 'netcdf4'),
-        ('s3://ooi-raw-data/new_test.zarr', 'zarr'),
-        ('s3://ooi-raw-data/new_test.nc', 'netcdf4'),
+        ("https://example.com/test.zarr", "zarr"),
+        ("https://example.com/", "zarr"),
+        ("https://example.com/test.nc", "netcdf4"),
+        ("s3://ooi-raw-data/new_test.zarr", "zarr"),
+        ("s3://ooi-raw-data/new_test.nc", "netcdf4"),
     ],
 )
 def test_validate_output_path(save_path, engine, minio_bucket):
-    output_root_path = os.path.join('.', 'echopype', 'test_data', 'dump')
-    source_file = 'test.raw'
-    if engine == 'netcdf4':
-        ext = '.nc'
+    output_root_path = os.path.join(".", "echopype", "test_data", "dump")
+    source_file = "test.raw"
+    if engine == "netcdf4":
+        ext = ".nc"
     else:
-        ext = '.zarr'
+        ext = ".zarr"
 
     if save_path is not None:
-        if '://' not in str(save_path):
+        if "://" not in str(save_path):
             save_path = os.path.join(output_root_path, save_path)
-        is_dir = True if Path(save_path).suffix == '' else False
+        is_dir = True if Path(save_path).suffix == "" else False
     else:
         is_dir = True
         save_path = output_root_path
@@ -101,31 +101,29 @@ def test_validate_output_path(save_path, engine, minio_bucket):
         )
 
     try:
-        output_path = validate_output_path(
-            source_file, engine, output_storage_options, save_path
-        )
+        output_path = validate_output_path(source_file, engine, output_storage_options, save_path)
 
         assert isinstance(output_path, str) is True
         assert Path(output_path).suffix == ext
 
         if is_dir:
-            assert Path(output_path).name == source_file.replace('.raw', '') + ext
+            assert Path(output_path).name == source_file.replace(".raw", "") + ext
         else:
             output_file = Path(save_path)
-            assert Path(output_path).name == output_file.name.replace(output_file.suffix, '') + ext
+            assert Path(output_path).name == output_file.name.replace(output_file.suffix, "") + ext
     except Exception as e:
-        if 'https://' in save_path:
-            if save_path == 'https://example.com/':
+        if "https://" in save_path:
+            if save_path == "https://example.com/":
                 assert isinstance(e, ValueError) is True
-                assert str(e) == 'Input file type not supported!'
-            elif save_path == 'https://example.com/test.nc':
+                assert str(e) == "Input file type not supported!"
+            elif save_path == "https://example.com/test.nc":
                 assert isinstance(e, ValueError) is True
-                assert str(e) == 'Only local netcdf4 is supported.'
+                assert str(e) == "Only local netcdf4 is supported."
             else:
                 assert isinstance(e, PermissionError) is True
-        elif save_path == 's3://ooi-raw-data/new_test.nc':
+        elif save_path == "s3://ooi-raw-data/new_test.nc":
             assert isinstance(e, ValueError) is True
-            assert str(e) == 'Only local netcdf4 is supported.'
+            assert str(e) == "Only local netcdf4 is supported."
 
 
 def mock_windows_return(*args: Tuple[str, ...]):
@@ -176,9 +174,11 @@ def mock_unix_return(*args: Tuple[str, ...]):
         (r"C:\folder", True, False),
         (r"s3://folder", False, True),
         (r"s3://folder", True, True),
-    ]
+    ],
 )
-def test_env_indep_joinpath_mock_return(save_path: str, is_windows: bool, is_cloud: bool, monkeypatch):
+def test_env_indep_joinpath_mock_return(
+    save_path: str, is_windows: bool, is_cloud: bool, monkeypatch
+):
     """
     Tests the function ``env_indep_joinpath`` using a mock return on varying OS and cloud
     path scenarios by adding a folder and a file to the input ``save_path``.
@@ -202,9 +202,9 @@ def test_env_indep_joinpath_mock_return(save_path: str, is_windows: bool, is_clo
 
     # assign the appropriate mock return for os.path.join
     if is_windows:
-        monkeypatch.setattr(os.path, 'join', mock_windows_return)
+        monkeypatch.setattr(os.path, "join", mock_windows_return)
     else:
-        monkeypatch.setattr(os.path, 'join', mock_unix_return)
+        monkeypatch.setattr(os.path, "join", mock_unix_return)
 
     # add folder and file to path
     joined_path = env_indep_joinpath(save_path, "output", "data.zarr")
@@ -222,7 +222,7 @@ def test_env_indep_joinpath_mock_return(save_path: str, is_windows: bool, is_clo
         (r"C:\root\folder", True, False),
         (r"s3://root/folder", False, True),
         (r"s3://root/folder", True, True),
-    ]
+    ],
 )
 def test_env_indep_joinpath_os_dependent(save_path: str, is_windows: bool, is_cloud: bool):
     """
@@ -253,14 +253,12 @@ def test_env_indep_joinpath_os_dependent(save_path: str, is_windows: bool, is_cl
         assert joined_path == r"s3://root/folder/output/data.zarr"
 
     elif is_windows:
-
         if platform.system() == "Windows":
             assert joined_path == r"C:\root\folder\output\data.zarr"
         else:
             pytest.skip("Skipping Windows parameters because we are not on a Windows machine.")
 
     else:
-
         if platform.system() != "Windows":
             assert joined_path == r"/root/folder/output/data.zarr"
         else:
@@ -270,22 +268,29 @@ def test_env_indep_joinpath_os_dependent(save_path: str, is_windows: bool, is_cl
 @pytest.mark.parametrize(
     ("source_ds_da_input", "storage_options_input", "true_file_type"),
     [
-        pytest.param(42, {}, None,
-                     marks=pytest.mark.xfail(
-                         strict=True,
-                         reason='This test should fail because source_ds is not of the correct type.')
-                     ),
+        pytest.param(
+            42,
+            {},
+            None,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="This test should fail because source_ds is not of the correct type.",
+            ),
+        ),
         pytest.param(xr.DataArray(), {}, None),
-        pytest.param({}, 42, None,
-                     marks=pytest.mark.xfail(
-                         strict=True,
-                         reason='This test should fail because storage_options is not of the correct type.')
-                     ),
+        pytest.param(
+            {},
+            42,
+            None,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="This test should fail because storage_options is not of the correct type.",
+            ),
+        ),
         (xr.Dataset(attrs={"test": 42}), {}, None),
-        (os.path.join('folder', 'new_test.nc'), {}, 'netcdf4'),
-        (os.path.join('folder', 'new_test.zarr'), {}, 'zarr')
-    ]
-
+        (os.path.join("folder", "new_test.nc"), {}, "netcdf4"),
+        (os.path.join("folder", "new_test.zarr"), {}, "zarr"),
+    ],
 )
 def test_validate_source_ds_da(source_ds_da_input, storage_options_input, true_file_type):
     """
@@ -294,7 +299,9 @@ def test_validate_source_ds_da(source_ds_da_input, storage_options_input, true_f
     are tested in ``test_validate_output_path`` and are therefore not included here.
     """
 
-    source_ds_output, file_type_output = validate_source_ds_da(source_ds_da_input, storage_options_input)
+    source_ds_output, file_type_output = validate_source_ds_da(
+        source_ds_da_input, storage_options_input
+    )
 
     if isinstance(source_ds_da_input, (xr.Dataset, xr.DataArray)):
         assert source_ds_output.identical(source_ds_da_input)
@@ -303,6 +310,7 @@ def test_validate_source_ds_da(source_ds_da_input, storage_options_input, true_f
         assert isinstance(source_ds_output, str)
         assert file_type_output == true_file_type
 
+
 def test_init_ep_dir(monkeypatch):
     temp_user_dir = tempfile.TemporaryDirectory()
     echopype_dir = Path(temp_user_dir.name) / ".echopype"
@@ -319,3 +327,8 @@ def test_init_ep_dir(monkeypatch):
     assert echopype.utils.io.ECHOPYPE_DIR.exists() is True
 
     temp_user_dir.cleanup()
+
+
+def test_get_dataset(sv_dataset_jr161):
+    gd = echopype.utils.io.get_dataset(sv_dataset_jr161)
+    assert gd == sv_dataset_jr161
diff --git a/echopype/tests/utils/test_utils_mask_transformation_xr.py b/echopype/tests/utils/test_utils_mask_transformation_xr.py
new file mode 100644
index 000000000..d103f2f33
--- /dev/null
+++ b/echopype/tests/utils/test_utils_mask_transformation_xr.py
@@ -0,0 +1,122 @@
+import numpy as np
+import pytest
+import echopype.utils.mask_transformation_xr as ep
+import xarray as xr
+
+
+def test_lin():
+    # Prepare input and expected output
+    variable = xr.DataArray([10, 20, 30])
+    expected_output = xr.DataArray([10, 100, 1000])
+
+    # Apply function
+    output = ep.lin(variable)
+
+    # Assert output is as expected
+    xr.testing.assert_equal(output, expected_output)
+
+
+def test_log():
+    # Prepare input and expected output
+    variable = xr.DataArray([10, 100, 0, -10])
+    expected_output = xr.DataArray([10, 20, -999, -999])
+
+    # Apply function
+    output = ep.log(variable)
+    # Assert output is as expected
+    truth = output == expected_output
+    assert truth.all()
+    # Test with a single positive number
+    assert ep.log(10) == 10 * np.log10(10)
+
+    # Test with a single negative number (should return -999)
+    assert ep.log(-10) == -999
+
+    # Test with zero (should return -999)
+    assert ep.log(0) == -999
+
+    # Test with a list of numbers
+    truth = ep.log([10, 20, -10, 0]) == xr.DataArray(
+        [
+            10 * np.log10(10),
+            10 * np.log10(20),
+            -999,
+            -999,
+        ]
+    )
+    assert truth.all()
+
+    # Test with an integer numpy array
+    int_array = xr.DataArray([10, 20, -10, 0])
+    truth = ep.log(int_array) == xr.DataArray([10 * np.log10(10), 10 * np.log10(20), -999, -999])
+    assert truth.all()
+
+    # Test with a single number in a numpy array
+    assert ep.log(np.array([10])) == 10 * np.log10(10)
+
+    # Test with a single negative number in a numpy array (should return -999)
+    assert ep.log(np.array([-10])) == -999
+
+    # Test with a single zero in a numpy array (should return -999)
+    assert ep.log(np.array([0])) == -999
+
+
+def test_downsample_exceptions():
+    data = np.arange(24).reshape(4, 6)
+    dims = ["x", "y"]
+    coords = {"x": np.arange(4), "y": np.arange(6)}
+    dataset = xr.DataArray(data=data, dims=dims, coords=coords)
+
+    with pytest.raises(Exception, match="Operation not in approved list"):
+        ep.downsample(dataset, {"x": 2}, "kind")
+    with pytest.raises(Exception, match="Coordinate z not in dataset coordinates"):
+        ep.downsample(dataset, {"z": 2}, "mean")
+
+
+@pytest.mark.parametrize(
+    "coordinates,operation,is_log,shape,value",
+    [
+        ({"range_sample": 2}, "mean", False, (3, 572, 1891), -10.82763365585262),
+        ({"range_sample": 2, "ping_time": 2}, "mean", False, (3, 286, 1891), -11.018715656585043),
+        ({"range_sample": 2}, "sum", False, (3, 572, 1891), -21.65526731170524),
+        ({"range_sample": 2}, "mean", True, (3, 572, 1891), -10.825779607785),
+    ],
+)
+def test_downsample(sv_dataset_jr230, coordinates, operation, is_log, shape, value):
+    source_Sv = sv_dataset_jr230.copy(deep=True)["Sv"]
+    res = ep.downsample(source_Sv, coordinates, operation, is_log)
+    assert res.values.shape == shape
+    assert res.values[-1, -1, -1] == value
+
+
+def test_upsample():
+    data = np.array([[3.5, 4.5, 5.5, 6.5, 7.5], [13.5, 14.5, 15.5, 16.5, 17.5]])
+    data_2 = np.arange(25).reshape(5, 5)
+    data_3 = np.array(
+        [
+            [3.5, 4.5, 5.5, 6.5, 7.5],
+            [3.5, 4.5, 5.5, 6.5, 7.5],
+            [13.5, 14.5, 15.5, 16.5, 17.5],
+            [13.5, 14.5, 15.5, 16.5, 17.5],
+            [np.nan, np.nan, np.nan, np.nan, np.nan],
+        ]
+    )
+    dims = ["x", "y"]
+    coords_1 = {"x": [1, 4], "y": [1, 3, 5, 7, 9]}
+    coords_2 = {"x": [1, 2, 3, 4, 5], "y": [1, 3, 5, 7, 9]}
+    ds_1 = xr.DataArray(data=data, dims=dims, coords=coords_1)
+    ds_2 = xr.DataArray(data=data_2, dims=dims, coords=coords_2)
+    ds_3 = xr.DataArray(data=data_3, dims=dims, coords=coords_2)
+    ds_4 = ep.upsample(ds_1, ds_2)
+    assert ds_3.equals(ds_4)
+
+
+def test_line_to_square():
+    row = [False, False, True, False]
+    one = xr.DataArray(data=[row], dims=["x", "y"], coords={"x": [1], "y": [1, 2, 3, 4]})
+    two = xr.DataArray(
+        data=[row, row, row], dims=["x", "y"], coords={"x": [1, 2, 3], "y": [1, 2, 3, 4]}
+    )
+    res = ep.line_to_square(one, two, dim="x")
+    print(res)
+    assert res.shape == two.shape
diff --git a/echopype/utils/mask_transformation_xr.py b/echopype/utils/mask_transformation_xr.py
new file mode 100644
index 000000000..968eeacc2
--- /dev/null
+++ b/echopype/utils/mask_transformation_xr.py
@@ -0,0 +1,115 @@
+import numpy as np
+import xarray as xr
+
+
+def lin(db: xr.DataArray) -> xr.DataArray:
+    """Convert decibel to linear scale, handling NaN values."""
+    linear = xr.where(db.isnull(), np.nan, 10 ** (db / 10))
+    return linear
+
+
+def log(linear: xr.DataArray) -> xr.DataArray:
+    """
+    Turn variable into the logarithmic domain. This function will return -999
+    in the case of values less or equal to zero (undefined logarithm). -999 is
+    the convention for empty water or vacant sample in fisheries acoustics.
+
+    Args:
+        variable (float): array of elements to be transformed.
+
+    Returns:
+        float: array of elements transformed
+    """
+    back_list = False
+    back_single = False
+    if not isinstance(linear, xr.DataArray):
+        if isinstance(linear, list):
+            linear = xr.DataArray(linear)
+            back_list = True
+        else:
+            linear = xr.DataArray([linear])
+            back_single = True
+
+    db = xr.apply_ufunc(lambda x: 10 * np.log10(x), linear)
+    db = xr.where(db.isnull(), -999, db)
+    db = xr.where(linear == 0, -999, db)
+    if back_list:
+        db = db.values
+    if back_single:
+        db = db.values[0]
+    return db
+
+
+def downsample(dataset, coordinates: {str: int}, operation: str = "mean", is_log: bool = False):
+    """
+    Given a dataset, downsamples it on the specified coordinates
+
+    Args:
+        dataset (xr.DataArray)  : the dataset to resample
+        coordinates({str: int}  : a mapping of dimensions to the windows to use
+        operation (str)         : the downsample operation to use
+        is_log (bool)           : True if the data is logarithmic and should be
+                                    converted to linear
+
+    Returns:
+        xr.DataArray            : the resampled dataset
+    """
+    operation_list = ["mean", "sum"]
+    if operation not in operation_list:
+        raise Exception("Operation not in approved list")
+    for k in coordinates.keys():
+        if k not in dataset.dims:
+            raise Exception("Coordinate " + k + " not in dataset coordinates")
+    if is_log:
+        dataset = lin(dataset)
+    if operation == "mean":
+        dataset = dataset.coarsen(coordinates, boundary="pad").mean()
+    elif operation == "sum":
+        dataset = dataset.coarsen(coordinates, boundary="pad").sum()
+    else:
+        raise Exception("Operation not in approved list")
+    # print(dataset)
+    if is_log:
+        dataset = log(dataset)
+    # mask = dataset.isnull()
+    return dataset
+
+
+def upsample(dataset: xr.DataArray, dataset_size: xr.DataArray):
+    """
+    Given a data dataset and an example dataset, upsamples the data dataset
+    to the example dataset's dimensions by repeating values
+
+    Args:
+        dataset (xr.DataArray)      : data
+        dataset_size (xr.DataArray) : dataset of the right size
+
+    Returns
+        xr.DataArray: the input dataset, with the same coords as dataset_size
+        and the values repeated to fill it up.
+    """
+
+    interpolated = dataset.interp_like(dataset_size, method="nearest")
+    return interpolated
+
+
+def line_to_square(one: xr.DataArray, two: xr.DataArray, dim: str):
+    """
+    Given a single dimension dataset and an example dataset with 2 dimensions,
+    returns a two-dimensional dataset that is the single dimension dataset
+    repeated as often as needed
+
+    Args:
+        one (xr.DataArray): data
+        two (xr.DataArray): shape dataset
+        dim (str): name of dimension to concat against
+
+    Returns:
+        xr.DataArray: the input dataset, with the same coords as dataset_size and
+        the values repeated to fill it up
+    """
+    length = len(two[dim])
+    array_list = [one for _ in range(0, length)]
+    array = xr.concat(array_list, dim=dim)
+    # return_data = xr.DataArray(data=array.values, dims=two.dims, coords=two.coords)
+    return array.values
diff --git a/requirements.txt b/requirements.txt
index c33c3c5ea..fcdbccde7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,6 @@ aiohttp
 xarray-datatree==0.0.6
 psutil>=5.9.1
 geopy
+# scikit-image
 flox>=0.7.2,<1.0.0
-scikit-image
 charset-normalizer<3.2