Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add method for computing harmonic mean #66

Merged
merged 11 commits into from
Apr 10, 2023
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"pandas>=1.5.3",
"pydantic>=1.10.6",
"pluggy>=1.0.0",
"scipy>=1.10.1",
"typer>=0.7.0"
]

Expand Down
19 changes: 6 additions & 13 deletions src/seagap/configs/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, List, Literal, Optional
from uuid import uuid4

from pydantic import BaseModel, Field, PrivateAttr, validator
from pydantic import BaseModel, Field, PrivateAttr

from .io import InputData

Expand Down Expand Up @@ -141,12 +141,12 @@ class Solver(BaseModel):
travel_times_variance: float = Field(
1e-10, description="VARIANCE (s**2) PXP two-way travel time measurement"
)
transponder_wait_time: float = Field(
transducer_delay_time: float = Field(
0.0,
description=(
"Transponder Wait Time - delay at surface transducer (secs.). "
"Options: ship/SV3 = 0.0s, WG = 0.1s"
),
description="Transducer Delay Time - delay at surface transducer (secs). ",
)
harmonic_mean_start_depth: float = Field(
0.0, description="Start depth in meters for harmonic mean computation."
)
input_files: SolverInputs = Field(
..., description="Input files data path specifications."
Expand All @@ -167,10 +167,3 @@ class Solver(BaseModel):
"which data points will be excluded from solution"
),
)

@validator("transponder_wait_time")
def check_transponder_wait_time(cls, v):
"""Validate transponder wait time value"""
if v not in [0.0, 0.1]:
raise ValueError("Transponder wait time must either be 0.0s or 0.1s")
return v
74 changes: 74 additions & 0 deletions src/seagap/harmonic_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pandas as pd
import scipy.stats


def _compute_hm(svdf: pd.DataFrame, start_depth: float, end_depth: float) -> float:
"""
Computes harmonic mean using `scipy's hmean <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.hmean.html>`_ method.
It takes the sound speed 'sv' as the input array and the depth 'dd' differences as the weights.

Note that this function assumes absolute values for depth array,
sound speed array, start depth, and end depth.

The underlying formula is

H = (w1+...+wn) / ((w1/x1)+...+(wn/xn))

H is the resulting harmonic mean
w is the weight value, in this case, the depth differences
x is the input value, in this case, the sound speed

Parameters
----------
svdf : pd.DataFrame
Sound speed profile data as dataframe with columns 'dd' and 'sv'
start_depth : float
The start depth for calculation
end_depth : float
The end depth for calculation

""" # noqa
for col in ["dd", "sv"]:
if col not in svdf.columns:
raise ValueError(f"{col} column must exist in the input dataframe!")

filtdf = svdf[
(svdf["dd"].round() >= start_depth) & (svdf["dd"].round() <= end_depth)
]

# Get weights
weights = filtdf["dd"].diff()

return scipy.stats.hmean(filtdf["sv"], weights=weights, nan_policy="omit")


def sv_harmonic_mean(svdf: pd.DataFrame, start_depth: float, end_depth: float) -> float:
"""
Computes harmonic mean from a sound profile
containing depth (dd) and sound speed (sv)

Parameters
----------
svdf : pd.DataFrame
Sound speed profile data as dataframe
start_depth : int or float
The start depth for harmonic mean to be computed
end_depth : int or float
The end depth for harmonic mean to be computed

Returns
-------
float
The sound speed harmonic mean value
"""
# Clean up the sound speed value, ensuring that there's no negative value
svdf = svdf[svdf["sv"] > 0].reset_index(drop=True)
# Make all of the values absolute values, so we're only dealing with positives
abs_start = abs(start_depth)
abs_end = abs(end_depth)
abs_sv = abs(svdf)
# Get the index for the start of depth closest to specified start depth
if len(abs_sv) == 0:
raise ValueError("Dataframe is empty! Please check your data inputs.")

return _compute_hm(abs_sv, abs_start, abs_end)
5 changes: 5 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from pathlib import Path

HERE = Path(__file__).parent

TEST_DATA_FOLDER = HERE / "data"
Loading