-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Krill inversion component sketch #321
base: main
Are you sure you want to change the base?
Changes from 18 commits
34be8cc
84d616c
f87c582
f8cdfa3
8b1e97f
0ee25b9
fee0021
f320013
fd8d357
21f89ea
921d32a
184578e
3698253
38921f4
0e92b69
69cd8ec
9670fbd
f499a1c
845a4e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
from pydantic import BaseModel | ||
from pandera import DataFrameModel | ||
from lmfit import Parameters | ||
from typing import Any, Dict, List, Union | ||
|
||
import pandas as pd | ||
from pathlib import Path | ||
|
||
#################################################################################################### | ||
# Validation / preparation | ||
#################################################################################################### | ||
|
||
class inversion_configuration_validator(BaseModel): | ||
""" | ||
Pydantic model for validating configuration parameters | ||
""" | ||
|
||
# RETURNS: Dict[str, Any] | ||
pass | ||
|
||
class dataset_validator(DataFrameModel): | ||
""" | ||
Pandera model for validating dataset values | ||
""" | ||
|
||
# RETURNS: pd.DataFrame | ||
pass | ||
|
||
def prepare_scattering_model_inputs(scattering_config: Dict[str, Any]) -> Dict[str, Any]: | ||
""" | ||
Prepare scattering model parameter inputs | ||
""" | ||
# == functions/set_para.m | ||
# == functions/inversion_para.m | ||
|
||
# PHASE 1) INGEST VALUES FROM CONFIGURATION FILE | ||
# PHASE 2) VALIDATE USING `inversion_configuration_validator` | ||
# PHASE 3) COMPUTE INTERMEDIATE VARIABLES (e.g. acoustic wavenumber, position matrix) | ||
# PHASE 4) PASS TO SCATTERER CLASS | ||
# --> EXTERNAL TO THIS FUNCTION | ||
|
||
# RETURNS: Validated scattering model inputs | ||
pass | ||
|
||
def prepare_dataset(dataset: pd.DataFrame) -> Dict[str, Any]: | ||
""" | ||
Prepare dataset inputs | ||
""" | ||
|
||
# PHASE 1) INGEST DATASET (*.xlsx) | ||
# PHASE 2) VALIDATE USING `dataset_validator` | ||
# PHASE 3) PARTITION DATASET BASED ON DIFFERENT ECHOMETRICS (e.g. mean Sv, median Sv) | ||
|
||
# RETURNS: Validated dataset DataFrame objects used for inversion | ||
pass | ||
|
||
def prepare_inversion_settings(inversion_config: Dict[str, Any]) -> Dict[str, Any]: | ||
""" | ||
Prepare inversion configuration and parameterization | ||
""" | ||
|
||
# PHASE 1) INGEST VALUES FROM CONFIGURATION FILE | ||
# PHASE 2) VALIDATE USING `inversion_configuration_validator` | ||
# PHASE 3) COMPUTE INTERMEDIATE VARIABLES (e.g. acoustic wavenumber, position matrix) | ||
|
||
# RETURNS: Validated inversion and optimization parameters | ||
pass | ||
|
||
|
||
#################################################################################################### | ||
# Data ingestion | ||
#################################################################################################### | ||
|
||
def yaml_configuration_reader(config_file: Union[str, Path]) -> Dict[str, Union[float, int, Parameters, pd.DataFrame, str]]: | ||
""" | ||
Read and validate the input parameterization YAML configuration | ||
""" | ||
# == functions/load_para_data.m | ||
# == functions/load_geo_phy_para.m | ||
# == functions/get_simu_para.m | ||
|
||
# PHASE 1) READ CONFIGURATION FILE | ||
|
||
# RETURNS: Raw Dict[str, Any] | ||
pass | ||
|
||
def dataset_reader(data_file: Union[str, Path]) -> pd.DataFrame: | ||
""" | ||
Read aggregate acoustic backscatter measurements | ||
""" | ||
# == functions/get_acoustic_data.m | ||
# == functions/load_MOCNESS_data.m | ||
# == functions/load_BIOMAPPER_data.m | ||
|
||
# PHASE 1) READ IN FILES | ||
|
||
# RETURNS: Raw pd.DataFrame (or Dict[str, Any]: see `prepare_dataset`) | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import Any, Callable, Dict, Literal, Union | ||
from numpy.typing import ArrayLike | ||
import pandas as pd | ||
|
||
def normalize_scattering_model_parameters( | ||
scattering_model_parameters: Dict[str, Any], | ||
) -> Dict[str, Any]: | ||
""" | ||
Normalize the scattering model parameters | ||
""" | ||
# == model_para_conversion.m | ||
pass | ||
|
||
def Sv_prediction_error( | ||
measured_Sv: ArrayLike[float], | ||
predicted_Sv: ArrayLike[float], | ||
): | ||
""" | ||
Compute inverted volumetric backscattering strength ($S[v]$) prediction error | ||
""" | ||
pass | ||
|
||
def invert_population( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
measured_Sv: ArrayLike[float], | ||
predicted_Sv: ArrayLike[float], | ||
inverted_ts: ArrayLike[float], | ||
kwargs # other parameters | ||
) -> ArrayLike[float]: # or just a full DataFrame given the multiple estimates being calculated | ||
""" | ||
Generate population estimates based on inverted TS model parameters | ||
""" | ||
|
||
# PHASE 1) MEAN NUMBER DENSITY | ||
# PHASE 2) AREAL NUMBER DENSITY | ||
# PHASE 3) ABUNDANCE | ||
# PHASE 4) ANIMAL BODY DENSITY (g/cm^3) | ||
# PHASE 5) BIOMASS | ||
# PHASE 6) AREAL BIOMASS DENSITY | ||
# PHASE 7) COMPUTE TOTAL PREDICTION ERROR ("Qe") | ||
total_error = Sv_prediction_error(measured_Sv, predicted_Sv) | ||
|
||
# RETURNS: Array or DataFrame ot population estimates | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
""" | ||
Mathematical and numerical utility functions. | ||
""" | ||
|
||
from scipy.special import spherical_jn, spherical_yn | ||
import numpy as np | ||
from typing import Any, Dict, Literal | ||
from numpy.typing import ArrayLike | ||
|
||
def spherical_hn(n, z, derivative=False) -> ArrayLike: | ||
""" | ||
Spherical Bessel function of the third kind (Hankel function) or its derivative | ||
|
||
Defined as [1]_, | ||
|
||
.. math:: h_n^{(1)}(z)=j_n(z)+in_n(z), | ||
|
||
where :math:`h_n^{(1)}` is the spherical Bessel function of the third kind (or Hankel function | ||
of the first kind), :math:`j_n` is the spherical Bessel function of the first kind, :math:`n_n` | ||
is the spherical Bessel function of the second kind (or Neumann function), :math:`n` is the | ||
order of the function (:math:`n>=0`), :math:`z` is the Bessel function argument value, and | ||
:math:`i` is an imaginary number. | ||
|
||
Parameters | ||
---------- | ||
n: int | ||
Order of the Bessel function (n >= 0) | ||
z: Union[float, np.complex] | ||
Argument of the Bessel function | ||
derivative: Optional[bool] | ||
When True, the derivative is computed | ||
|
||
Notes | ||
----- | ||
The derivative is computed using the relations [2]_, | ||
|
||
.. math:: | ||
\frac{n}{z} h^{(1)}_n - h^{(1)}_{n+1}(z) | ||
|
||
References | ||
---------- | ||
.. [1] https://dlmf.nist.gov/10.47#E5 | ||
.. [2] https://dlmf.nist.gov/10.51#E2 | ||
|
||
""" | ||
# == lib/sphhn.m | ||
|
||
# Define internal function | ||
def _spherical_hn(n, z): | ||
return spherical_jn(n, z) + 1j * spherical_yn(n, z) | ||
|
||
# Computing derivative | ||
if derivative: | ||
return (n/z) * _spherical_hn(n, z) - _spherical_hn(n+1, z) | ||
else: | ||
return _spherical_hn(n, z) | ||
|
||
def length_average( | ||
length: ArrayLike[float], | ||
form_function: ArrayLike[complex], | ||
distribution_kwargs: Dict[str, float], | ||
distribution: Literal["gaussian", "uniform"] = "gaussian", | ||
) -> ArrayLike: | ||
""" | ||
Compute the length-averaged linear backscattering cross-section (:math:`\sigma_{bs}(L)`) | ||
""" | ||
# == Scat_models/length_ave.m | ||
|
||
# PHASE 1) EXTRACT RELEVANT PARAMETERS (e.g. ka) | ||
# PHASE 2) GENERATE PDF BASED ON SELECTED DISTRIBUTION | ||
if distribution == "gaussian": | ||
pass | ||
elif distribution == "uniform": | ||
pass | ||
else: | ||
raise ValueError("Invalid distribution type. Choose 'gaussian' or 'uniform'.") | ||
# PHASE 3) SQUARE SIGMA_BS | ||
# PHASE 4) COMPUTE SIGMA_BS OVER CONFIGURED PDF BINS AT EACH DEFINED FREQUENCY | ||
|
||
# RETURNS: sqrt(sum(sigma_bs)) | ||
pass | ||
|
||
def orientation_average( | ||
angle: ArrayLike[float], | ||
form_function: ArrayLike[complex], | ||
distribution_kwargs: Dict[str, float], | ||
distribution: Literal["gaussian", "uniform"] = "gaussian", | ||
) -> ArrayLike: | ||
""" | ||
Compute the orientation-averaged linear backscattering cross-section :math:`\sigma_{bs}(\theta)` | ||
""" | ||
# == Scat_models/orient_ave.m | ||
|
||
# PHASE 1) EXTRACT RELEVANT PARAMETERS (e.g. ka) | ||
# PHASE 2) GENERATE PDF BASED ON SELECTED DISTRIBUTION | ||
if distribution == "gaussian": | ||
pass | ||
elif distribution == "uniform": | ||
pass | ||
else: | ||
raise ValueError("Invalid distribution type. Choose 'gaussian' or 'uniform'.") | ||
# PHASE 3) SQUARE SIGMA_BS | ||
# PHASE 4) COMPUTE SIGMA_BS OVER CONFIGURED PDF BINS AT EACH DEFINED FREQUENCY | ||
|
||
# RETURNS: sqrt(sum(sigma_bs)) | ||
pass | ||
|
||
def fit_rayleigh_pdf( | ||
measured: ArrayLike[float], | ||
density: ArrayLike[float], | ||
mean: float, | ||
standard_deviation: float, | ||
lower_bounds: float, | ||
upper_bounds: float, | ||
arg_distribution: Literal["exponential", "gaussian"] = "gaussian", | ||
): | ||
""" | ||
Fit a single-parameter Rayleigh probability density function to the measured data | ||
""" | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import numpy as np | ||
from lmfit import Minimizer, Parameters | ||
from typing import Any, Callable, Dict, Literal, Union | ||
from numpy.typing import ArrayLike | ||
|
||
def mae( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be |
||
prediction: ArrayLike[float], | ||
measurement: ArrayLike[float], | ||
): | ||
""" | ||
Mean absolute deviation (MAD) in logarithmic space (dB) | ||
""" | ||
# == functions/cost_functionALL.m | ||
pass | ||
|
||
def rmse( | ||
prediction: ArrayLike[float], | ||
measurement: ArrayLike[float], | ||
): | ||
""" | ||
Root mean square deviation (RMSE) in logarithmic space (dB) | ||
""" | ||
# == functions/cost_functionALL.m | ||
pass | ||
|
||
def normalize_optimization_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]: | ||
""" | ||
Normalize the optimization parameters | ||
""" | ||
pass | ||
|
||
def prepare_optimization( | ||
scattering_model_parameters: Dict[str, Any], | ||
optimization_settings: Dict[str, Any], | ||
cost_function: Callable = mad, | ||
) -> Dict[str, Union[Minimizer, Parameters]]: | ||
""" | ||
Prepare optimization settings | ||
""" | ||
|
||
# PHASE 1) EXTRACT RELEVANT SCATTERING MODEL PARAMETERS | ||
# PHASE 2) CONVERT RELEVANT OPTIMIZATION PARAMETERS INTO ASSOCIATED `lmfit::Parameters` | ||
params = Parameters(**scattering_model_parameters) # not actual code, just a placeholder | ||
# PHASE 3) WITH COST-FUNCTION, CREATE `lmfit::Minimizer` OBJECT | ||
# not actual code, just a placeholder | ||
minim = Minimizer(cost_function, params, **optimization_settings) | ||
# RETURNS: Dictionary with optimization parameters and minimizer | ||
return {"parameters": params, "minimizer": minim} | ||
|
||
def optimize_scattering_model( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be renamed to something like |
||
predicted_Sv: ArrayLike[float], | ||
measured_Sv: ArrayLike[float], | ||
parameters: Parameters, | ||
cost_function: Minimizer, | ||
optimization_settings: Dict[str, Any], | ||
) -> Dict[str, Any]: | ||
""" | ||
Optimize scattering model parameters | ||
""" | ||
# == functions/SVpredictionALL.m | ||
# == KrillSvInversion_simu_data_2020_05_01.m | ||
|
||
# PHASE 1) RUN OPTIMIZATION | ||
# not actual code, just a placeholder | ||
parameters_optimized = cost_function.minimize( | ||
method="least_squares", | ||
**optimization_settings["config"] | ||
) | ||
# PHASE 2) CALCULATE MEAN ABSOLUTE DEVIATION | ||
mad_optimized = np.mean(np.abs(parameters_optimized.residual)) | ||
# PHASE 3) EXTRACT THE BEST-FIT PARAMETERS | ||
best_fit_params = parameters_optimized.params.valuesdict() | ||
|
||
# RETURNS: Best-fit scattering model parameters | ||
return best_fit_params |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from pathlib import Path | ||
import pandas as pd | ||
from typing import Any, Dict, Union | ||
from ...survey import Survey | ||
|
||
def inversion_pipeline( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way things are designed now are completely separate from the hake component, which is not what I envisioned. I think it is fine as an intermediate step, but ultimately a more modularized framework will be needed to make the package more useful as an open-source project, and also remove the need for most "patcher" type of functions. Let's talk more about this when we meet. |
||
acoustic_dataset_file: Union[str, Path], | ||
scattering_config_file: Union[str, Path], | ||
inversion_config_file: Union[str, Path], | ||
) -> Dict[str, Any]: | ||
""" | ||
Consolidated workflow for predicting volumetric backscatter using inverted scattering model | ||
parameters/inputs | ||
""" | ||
|
||
# PHASE 1) READ IN DATASET FILE | ||
# PHASE 2) READ IN CONFIGURATION FILES | ||
# PHASE 3) PREPARE DATASET FOR INVERSION | ||
# PHASE 4) PREPARE CONFIGURATION SETTINGS FOR OPTIMIZATION CALCULATIONS | ||
# PHASE 5) PREPARE SCATTERING MODEL AND OBJECT | ||
# PHASE 6) INVERT SCATTERING MODEL | ||
# PHASE 7) CALCULATE POPULATION ESTIMATES | ||
|
||
# RETURNS: A dictionary with grouped DataFrame (or new columns appended to acoustic dataset | ||
# columns?) objects that incorporate the estimated population estimates from the inverted | ||
# scattering model. One key in this output would also comprise the simulation results from the | ||
# optimization for user scrutinization | ||
pass | ||
|
||
def inversion_survey_patch( | ||
self: Survey, | ||
acoustic_dataset_file: Union[str, Path], | ||
scattering_config_file: Union[str, Path], | ||
inversion_config_file: Union[str, Path], | ||
) -> None: | ||
""" | ||
Patching method to add `inversion_pipeline` function as a method to the base `echopop::Survey` | ||
class | ||
""" | ||
|
||
# NOTE: This would be patched using the import functions defined in | ||
# `extentions/survey_extentions.py` | ||
|
||
return inversion_pipeline(acoustic_dataset_file, scattering_config_file, inversion_config_file) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can go into
optimize.py
since the output of the optimizer is doing the "inversion" so can just be packaged there.