OSOceanAcoustics · leewujung · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/echopop/extensions/inversion/__init__.py b/echopop/extensions/inversion/__init__.py
diff --git a/echopop/extensions/inversion/data_ingestion.py b/echopop/extensions/inversion/data_ingestion.py
@@ -0,0 +1,98 @@
+from pydantic import BaseModel
+from pandera import DataFrameModel
+from lmfit import Parameters
+from typing import Any, Dict, List, Union
+
+import pandas as pd
+from pathlib import Path
+
+####################################################################################################
+# Validation / preparation
+####################################################################################################
+
+class inversion_configuration_validator(BaseModel):
+    """
+    Pydantic model for validating configuration parameters
+    """
+
+    # RETURNS: Dict[str, Any]
+    pass
+
+class dataset_validator(DataFrameModel):
+    """
+    Pandera model for validating dataset values
+    """
+
+    # RETURNS: pd.DataFrame
+    pass
+
+def prepare_scattering_model_inputs(scattering_config: Dict[str, Any]) -> Dict[str, Any]: 
+    """
+    Prepare scattering model parameter inputs
+    """
+    # == functions/set_para.m
+    # == functions/inversion_para.m
+
+    # PHASE 1) INGEST VALUES FROM CONFIGURATION FILE
+    # PHASE 2) VALIDATE USING `inversion_configuration_validator`
+    # PHASE 3) COMPUTE INTERMEDIATE VARIABLES (e.g. acoustic wavenumber, position matrix)
+    # PHASE 4) PASS TO SCATTERER CLASS
+    #          --> EXTERNAL TO THIS FUNCTION
+
+    # RETURNS: Validated scattering model inputs
+    pass    
+
+def prepare_dataset(dataset: pd.DataFrame) -> Dict[str, Any]:
+    """
+    Prepare dataset inputs
+    """
+
+    # PHASE 1) INGEST DATASET (*.xlsx)
+    # PHASE 2) VALIDATE USING `dataset_validator`
+    # PHASE 3) PARTITION DATASET BASED ON DIFFERENT ECHOMETRICS (e.g. mean Sv, median Sv)
+
+    # RETURNS: Validated dataset DataFrame objects used for inversion
+    pass
+
+def prepare_inversion_settings(inversion_config: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Prepare inversion configuration and parameterization
+    """
+
+    # PHASE 1) INGEST VALUES FROM CONFIGURATION FILE
+    # PHASE 2) VALIDATE USING `inversion_configuration_validator`
+    # PHASE 3) COMPUTE INTERMEDIATE VARIABLES (e.g. acoustic wavenumber, position matrix)
+
+    # RETURNS: Validated inversion and optimization parameters
+    pass
+
+
+####################################################################################################
+# Data ingestion
+####################################################################################################
+
+def yaml_configuration_reader(config_file: Union[str, Path]) -> Dict[str, Union[float, int, Parameters, pd.DataFrame, str]]:
+    """
+    Read and validate the input parameterization YAML configuration
+    """
+    # == functions/load_para_data.m
+    # == functions/load_geo_phy_para.m
+    # == functions/get_simu_para.m
+
+    # PHASE 1) READ CONFIGURATION FILE
+
+    # RETURNS: Raw Dict[str, Any]
+    pass
+
+def dataset_reader(data_file: Union[str, Path]) -> pd.DataFrame:
+    """
+    Read aggregate acoustic backscatter measurements
+    """
+    # == functions/get_acoustic_data.m
+    # == functions/load_MOCNESS_data.m
+    # == functions/load_BIOMAPPER_data.m
+
+    # PHASE 1) READ IN FILES
+
+    # RETURNS: Raw pd.DataFrame (or Dict[str, Any]: see `prepare_dataset`)
+    pass
diff --git a/echopop/extensions/inversion/invert.py b/echopop/extensions/inversion/invert.py
@@ -0,0 +1,43 @@
+from typing import Any, Callable, Dict, Literal, Union
+from numpy.typing import ArrayLike
+import pandas as pd
+
+def normalize_scattering_model_parameters(
+    scattering_model_parameters: Dict[str, Any],
+) -> Dict[str, Any]:
+    """
+    Normalize the scattering model parameters
+    """
+    # == model_para_conversion.m
+    pass
+
+def Sv_prediction_error(
+    measured_Sv: ArrayLike[float],
+    predicted_Sv: ArrayLike[float],
+): 
+    """
+    Compute inverted volumetric backscattering strength ($S[v]$) prediction error
+    """
+    pass
+
+def invert_population(
+    measured_Sv: ArrayLike[float],
+    predicted_Sv: ArrayLike[float],
+    inverted_ts: ArrayLike[float],
+    kwargs # other parameters
+) -> ArrayLike[float]: # or just a full DataFrame given the multiple estimates being calculated
+    """
+    Generate population estimates based on inverted TS model parameters
+    """
+
+    # PHASE 1) MEAN NUMBER DENSITY
+    # PHASE 2) AREAL NUMBER DENSITY
+    # PHASE 3) ABUNDANCE
+    # PHASE 4) ANIMAL BODY DENSITY (g/cm^3)
+    # PHASE 5) BIOMASS
+    # PHASE 6) AREAL BIOMASS DENSITY
+    # PHASE 7) COMPUTE TOTAL PREDICTION ERROR ("Qe")
+    total_error = Sv_prediction_error(measured_Sv, predicted_Sv)
+
+    # RETURNS: Array or DataFrame ot population estimates
+    pass
diff --git a/echopop/extensions/inversion/math.py b/echopop/extensions/inversion/math.py
@@ -0,0 +1,120 @@
+"""
+Mathematical and numerical utility functions.
+"""
+
+from scipy.special import spherical_jn, spherical_yn
+import numpy as np
+from typing import Any, Dict, Literal
+from numpy.typing import ArrayLike
+
+def spherical_hn(n, z, derivative=False) -> ArrayLike:
+    """
+    Spherical Bessel function of the third kind (Hankel function) or its derivative
+
+    Defined as [1]_,
+
+    .. math:: h_n^{(1)}(z)=j_n(z)+in_n(z),
+
+    where :math:`h_n^{(1)}` is the spherical Bessel function of the third kind (or Hankel function 
+    of the first kind), :math:`j_n` is the spherical Bessel function of the first kind, :math:`n_n` 
+    is the spherical Bessel function of the second kind (or Neumann function), :math:`n` is the 
+    order of the function (:math:`n>=0`), :math:`z` is the Bessel function argument value, and 
+    :math:`i` is an imaginary number.
+
+    Parameters
+    ----------
+    n: int
+        Order of the Bessel function (n >= 0)
+    z: Union[float, np.complex]
+        Argument of the Bessel function
+    derivative: Optional[bool]
+        When True, the derivative is computed
+
+    Notes
+    -----
+    The derivative is computed using the relations [2]_,
+
+    .. math::
+        \frac{n}{z} h^{(1)}_n - h^{(1)}_{n+1}(z)
+
+    References
+    ----------    
+    .. [1] https://dlmf.nist.gov/10.47#E5
+    .. [2] https://dlmf.nist.gov/10.51#E2
+
+    """
+    # == lib/sphhn.m
+
+    # Define internal function
+    def _spherical_hn(n, z):
+        return spherical_jn(n, z) + 1j * spherical_yn(n, z)
+
+    # Computing derivative
+    if derivative:
+        return (n/z) * _spherical_hn(n, z) - _spherical_hn(n+1, z)
+    else:
+        return _spherical_hn(n, z)
+
+def length_average(
+    length: ArrayLike[float],
+    form_function: ArrayLike[complex],
+    distribution_kwargs: Dict[str, float],
+    distribution: Literal["gaussian", "uniform"] = "gaussian",      
+) -> ArrayLike: 
+    """
+    Compute the length-averaged linear backscattering cross-section (:math:`\sigma_{bs}(L)`) 
+    """
+    # == Scat_models/length_ave.m
+
+    # PHASE 1) EXTRACT RELEVANT PARAMETERS (e.g. ka)
+    # PHASE 2) GENERATE PDF BASED ON SELECTED DISTRIBUTION
+    if distribution == "gaussian": 
+        pass
+    elif distribution == "uniform":
+        pass
+    else:
+        raise ValueError("Invalid distribution type. Choose 'gaussian' or 'uniform'.")
+    # PHASE 3) SQUARE SIGMA_BS
+    # PHASE 4) COMPUTE SIGMA_BS OVER CONFIGURED PDF BINS AT EACH DEFINED FREQUENCY
+
+    # RETURNS: sqrt(sum(sigma_bs))
+    pass
+
+def orientation_average(
+    angle: ArrayLike[float],
+    form_function: ArrayLike[complex],
+    distribution_kwargs: Dict[str, float],
+    distribution: Literal["gaussian", "uniform"] = "gaussian",  
+) -> ArrayLike:
+    """
+    Compute the orientation-averaged linear backscattering cross-section :math:`\sigma_{bs}(\theta)`
+    """
+    # == Scat_models/orient_ave.m
+
+    # PHASE 1) EXTRACT RELEVANT PARAMETERS (e.g. ka)
+    # PHASE 2) GENERATE PDF BASED ON SELECTED DISTRIBUTION
+    if distribution == "gaussian": 
+        pass
+    elif distribution == "uniform":
+        pass
+    else:
+        raise ValueError("Invalid distribution type. Choose 'gaussian' or 'uniform'.")
+    # PHASE 3) SQUARE SIGMA_BS
+    # PHASE 4) COMPUTE SIGMA_BS OVER CONFIGURED PDF BINS AT EACH DEFINED FREQUENCY
+
+    # RETURNS: sqrt(sum(sigma_bs))
+    pass
+
+def fit_rayleigh_pdf(
+    measured: ArrayLike[float],
+    density: ArrayLike[float],
+    mean: float,
+    standard_deviation: float,
+    lower_bounds: float,
+    upper_bounds: float,
+    arg_distribution: Literal["exponential", "gaussian"] = "gaussian",
+):
+    """
+    Fit a single-parameter Rayleigh probability density function to the measured data
+    """
+    pass
diff --git a/echopop/extensions/inversion/optimize.py b/echopop/extensions/inversion/optimize.py
@@ -0,0 +1,75 @@
+import numpy as np
+from lmfit import Minimizer, Parameters
+from typing import Any, Callable, Dict, Literal, Union
+from numpy.typing import ArrayLike
+
+def mae(
+    prediction: ArrayLike[float],
+    measurement: ArrayLike[float],
+):
+    """
+    Mean absolute deviation (MAD) in logarithmic space (dB)
+    """
+    # == functions/cost_functionALL.m
+    pass
+
+def rmse(
+    prediction: ArrayLike[float],
+    measurement: ArrayLike[float],
+):
+    """
+    Root mean square deviation (RMSE) in logarithmic space (dB)
+    """
+    # == functions/cost_functionALL.m
+    pass
+
+def normalize_optimization_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Normalize the optimization parameters
+    """
+    pass
+
+def prepare_optimization(
+    scattering_model_parameters: Dict[str, Any],
+    optimization_settings: Dict[str, Any],
+    cost_function: Callable = mad,
+) -> Dict[str, Union[Minimizer, Parameters]]:
+    """
+    Prepare optimization settings
+    """
+
+    # PHASE 1) EXTRACT RELEVANT SCATTERING MODEL PARAMETERS
+    # PHASE 2) CONVERT RELEVANT OPTIMIZATION PARAMETERS INTO ASSOCIATED `lmfit::Parameters`
+    params = Parameters(**scattering_model_parameters) # not actual code, just a placeholder
+    # PHASE 3) WITH COST-FUNCTION, CREATE `lmfit::Minimizer` OBJECT
+    # not actual code, just a placeholder 
+    minim = Minimizer(cost_function, params, **optimization_settings)         
+    # RETURNS: Dictionary with optimization parameters and minimizer
+    return {"parameters": params, "minimizer": minim}
+
+def optimize_scattering_model(
+    predicted_Sv: ArrayLike[float],
+    measured_Sv: ArrayLike[float],
+    parameters: Parameters,
+    cost_function: Minimizer,
+    optimization_settings: Dict[str, Any],
+) -> Dict[str, Any]:
+    """
+    Optimize scattering model parameters
+    """
+    # == functions/SVpredictionALL.m
+    # == KrillSvInversion_simu_data_2020_05_01.m
+
+    # PHASE 1) RUN OPTIMIZATION
+    # not actual code, just a placeholder 
+    parameters_optimized = cost_function.minimize(
+        method="least_squares", 
+        **optimization_settings["config"]
+    )
+    # PHASE 2) CALCULATE MEAN ABSOLUTE DEVIATION
+    mad_optimized = np.mean(np.abs(parameters_optimized.residual))
+    # PHASE 3) EXTRACT THE BEST-FIT PARAMETERS
+    best_fit_params = parameters_optimized.params.valuesdict()
+
+    # RETURNS: Best-fit scattering model parameters
+    return best_fit_params
diff --git a/echopop/extensions/inversion/patcher.py b/echopop/extensions/inversion/patcher.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+import pandas as pd
+from typing import Any, Dict, Union
+from ...survey import Survey
+
+def inversion_pipeline(
+    acoustic_dataset_file: Union[str, Path],
+    scattering_config_file: Union[str, Path],
+    inversion_config_file: Union[str, Path],
+) -> Dict[str, Any]:
+    """
+    Consolidated workflow for predicting volumetric backscatter using inverted scattering model 
+    parameters/inputs
+    """
+
+    # PHASE 1) READ IN DATASET FILE
+    # PHASE 2) READ IN CONFIGURATION FILES
+    # PHASE 3) PREPARE DATASET FOR INVERSION
+    # PHASE 4) PREPARE CONFIGURATION SETTINGS FOR OPTIMIZATION CALCULATIONS
+    # PHASE 5) PREPARE SCATTERING MODEL AND OBJECT
+    # PHASE 6) INVERT SCATTERING MODEL
+    # PHASE 7) CALCULATE POPULATION ESTIMATES
+
+    # RETURNS: A dictionary with grouped DataFrame (or new columns appended to acoustic dataset 
+    # columns?) objects that incorporate the estimated population estimates from the inverted 
+    # scattering model. One key in this output would also comprise the simulation results from the 
+    # optimization for user scrutinization
+    pass 
+
+def inversion_survey_patch(
+    self: Survey,
+    acoustic_dataset_file: Union[str, Path],
+    scattering_config_file: Union[str, Path],
+    inversion_config_file: Union[str, Path],
+) -> None:
+    """
+    Patching method to add `inversion_pipeline` function as a method to the base `echopop::Survey`
+    class
+    """
+
+    # NOTE: This would be patched using the import functions defined in 
+    # `extentions/survey_extentions.py`
+
+    return inversion_pipeline(acoustic_dataset_file, scattering_config_file, inversion_config_file)