Skip to content

Commit

Permalink
Merge pull request #155 from glevv/add-new-stats
Browse files Browse the repository at this point in the history
Readme updates
  • Loading branch information
glevv authored Nov 24, 2024
2 parents e34e92d + 30c58b8 commit 68c7c7b
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 101 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

- Collection of measures of central tendency - `obscure_stats/central_tendency`:
* Contraharmonic Mean;
* Grenander's Mode;
* Half-Sample Mode;
* Hodges-Lehmann-Sen Location;
* Midhinge;
Expand Down Expand Up @@ -48,16 +49,19 @@
* Hossain-Adnan Skewness Coefficient;
* Kelly Skewness Coefficient;
* L-Skewness Coefficient;
* Left Quantile Weight;
* Medeen Skewness Coefficient;
* Pearson Median Skewness Coefficient;
* Pearson Mode Skewness Coefficient.
* Pearson Mode Skewness Coefficient;
* Right Quantile Weight;
- Collection of measures of kurtosis - `obscure_stats/kurtosis`:
* Crow-Siddiqui Kurtosis;
* L-Kurtosis;
* Hogg Kurtosis;
* Moors Kurtosis;
* Moors Octile Kurtosis;
* Reza-Ma Kurtosis;
* Schmid-Trede measure of Peakedness;
* Staudte Kurtosis.
- Collection of measures of association - `obscure_stats/association`:
* Blomqvist's Beta;
Expand Down
4 changes: 0 additions & 4 deletions src/obscure_stats/kurtosis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
crow_siddiqui_kurt,
hogg_kurt,
l_kurt,
left_quantile_weight,
moors_kurt,
moors_octile_kurt,
reza_ma_kurt,
right_quantile_weight,
schmid_trede_peakedness,
staudte_kurt,
)
Expand All @@ -17,11 +15,9 @@
"crow_siddiqui_kurt",
"hogg_kurt",
"l_kurt",
"left_quantile_weight",
"moors_kurt",
"moors_octile_kurt",
"reza_ma_kurt",
"right_quantile_weight",
"schmid_trede_peakedness",
"staudte_kurt",
]
72 changes: 0 additions & 72 deletions src/obscure_stats/kurtosis/kurtosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,75 +232,3 @@ def schmid_trede_peakedness(x: np.ndarray) -> float:
"""
p125, p25, p75, p875 = np.nanquantile(x, [0.125, 0.25, 0.75, 0.875])
return (p875 - p125) / (p75 - p25)


def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float:
"""Calculate left quantile weight (LQW).
It is based on inter-percentile ranges (uncentered, unscaled) of the
left tail of the distribution.
Parameters
----------
x : array_like
Input array.
q : float
Quantile to use for the anchor.
Returns
-------
lqw : float
The value of left quantile weight.
References
----------
Brys, G.; Hubert, M.; Struyf, A. (2006).
Robust measures of tail weight.
Computational Statistics and Data Analysis 50(3), 733-759.
"""
min_q, max_q = 0.0, 0.5
if q <= min_q or q >= max_q:
msg = "Parameter q should be in range (0, 0.5)."
raise ValueError(msg)
lower_quantile, q025, upper_quantile = np.nanquantile(
x, [q * 0.5, 0.25, (1 - q) * 0.5]
)
return -(upper_quantile + lower_quantile - 2 * q025) / (
upper_quantile - lower_quantile
)


def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float:
"""Calculate right quantile weight (RQW).
It is based on inter-percentile ranges (uncentered, unscaled) of the
right tail of the distribution.
Parameters
----------
x : array_like
Input array.
q : float
Quantile to use for the anchor.
Returns
-------
rqw : float
The value of right quantile weight.
References
----------
Brys, G.; Hubert, M.; Struyf, A. (2006).
Robust measures of tail weight.
Computational Statistics and Data Analysis 50(3), 733-759.
"""
min_q, max_q = 0.5, 1.0
if q <= min_q or q >= max_q:
msg = "Parameter q should be in range (0.5, 1.0)."
raise ValueError(msg)
lower_quantile, q075, upper_quantile = np.nanquantile(
x, [1 - q * 0.5, 0.75, (1 + q) * 0.5]
)
return (lower_quantile + upper_quantile - 2 * q075) / (
lower_quantile - upper_quantile
)
4 changes: 4 additions & 0 deletions src/obscure_stats/skewness/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
hossain_adnan_skew,
kelly_skew,
l_skew,
left_quantile_weight,
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
right_quantile_weight,
wauc_skew_gamma,
)

Expand All @@ -26,8 +28,10 @@
"hossain_adnan_skew",
"kelly_skew",
"l_skew",
"left_quantile_weight",
"medeen_skew",
"pearson_median_skew",
"pearson_mode_skew",
"right_quantile_weight",
"wauc_skew_gamma",
]
72 changes: 72 additions & 0 deletions src/obscure_stats/skewness/skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,75 @@ def cumulative_skew(x: np.ndarray) -> float:
d = q - p
w = (2 * r - n) * 3 / n
return np.sum(d * w) / np.sum(d)


def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float:
"""Calculate left quantile weight (LQW).
It is based on inter-percentile ranges (uncentered, unscaled) of the
left tail of the distribution.
Parameters
----------
x : array_like
Input array.
q : float
Quantile to use for the anchor.
Returns
-------
lqw : float
The value of left quantile weight.
References
----------
Brys, G.; Hubert, M.; Struyf, A. (2006).
Robust measures of tail weight.
Computational Statistics and Data Analysis 50(3), 733-759.
"""
min_q, max_q = 0.0, 0.5
if q <= min_q or q >= max_q:
msg = "Parameter q should be in range (0, 0.5)."
raise ValueError(msg)
lower_quantile, q025, upper_quantile = np.nanquantile(
x, [q * 0.5, 0.25, (1 - q) * 0.5]
)
return -(upper_quantile + lower_quantile - 2 * q025) / (
upper_quantile - lower_quantile
)


def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float:
"""Calculate right quantile weight (RQW).
It is based on inter-percentile ranges (uncentered, unscaled) of the
right tail of the distribution.
Parameters
----------
x : array_like
Input array.
q : float
Quantile to use for the anchor.
Returns
-------
rqw : float
The value of right quantile weight.
References
----------
Brys, G.; Hubert, M.; Struyf, A. (2006).
Robust measures of tail weight.
Computational Statistics and Data Analysis 50(3), 733-759.
"""
min_q, max_q = 0.5, 1.0
if q <= min_q or q >= max_q:
msg = "Parameter q should be in range (0.5, 1.0)."
raise ValueError(msg)
lower_quantile, q075, upper_quantile = np.nanquantile(
x, [1 - q * 0.5, 0.75, (1 + q) * 0.5]
)
return (lower_quantile + upper_quantile - 2 * q075) / (
lower_quantile - upper_quantile
)
4 changes: 2 additions & 2 deletions tests/test_dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def test_mock_aggregation_functions(
def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None:
"""Testing for result correctness."""
rng = np.random.default_rng(seed)
low_disp = np.round(rng.exponential(scale=1, size=99) + 1, 2)
high_disp = np.round(rng.exponential(scale=10, size=99) + 1, 2)
low_disp = np.round(rng.exponential(scale=1, size=100) + 1, 2)
high_disp = np.round(rng.exponential(scale=10, size=100) + 1, 2)
low_disp_res = func(low_disp)
high_disp_res = func(high_disp)
if low_disp_res > high_disp_res:
Expand Down
20 changes: 2 additions & 18 deletions tests/test_kurtosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
crow_siddiqui_kurt,
hogg_kurt,
l_kurt,
left_quantile_weight,
moors_kurt,
moors_octile_kurt,
reza_ma_kurt,
right_quantile_weight,
schmid_trede_peakedness,
staudte_kurt,
)
Expand All @@ -26,11 +24,9 @@
crow_siddiqui_kurt,
hogg_kurt,
l_kurt,
left_quantile_weight,
moors_kurt,
moors_octile_kurt,
reza_ma_kurt,
right_quantile_weight,
schmid_trede_peakedness,
staudte_kurt,
]
Expand All @@ -53,14 +49,10 @@ def test_mock_aggregation_functions(
def test_kurt_sensibility(func: typing.Callable, seed: int) -> None:
"""Testing for result correctness."""
rng = np.random.default_rng(seed)
platy = rng.uniform(size=99)
lepto = rng.laplace(size=99)
platy = rng.uniform(size=100)
lepto = rng.laplace(size=100)
platy_res = func(platy)
lepto_res = func(lepto)
if func.__name__ == "right_quantile_weight":
# ugly but more harmonized this way
platy_res = -platy_res
lepto_res = -lepto_res
if platy_res > lepto_res:
msg = (
f"Kurtosis in the first case should be lower, got {platy_res} > {lepto_res}"
Expand All @@ -76,14 +68,6 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) ->
raise ValueError(msg)


@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight])
@pytest.mark.parametrize("q", [0.0, 1.0])
def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None:
"""Simple tets case for correctnes of q."""
with pytest.raises(ValueError, match="Parameter q should be in range"):
func(x_array_float, q=q)


@given(
arrays(
dtype=np.float64,
Expand Down
21 changes: 19 additions & 2 deletions tests/test_skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
hossain_adnan_skew,
kelly_skew,
l_skew,
left_quantile_weight,
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
right_quantile_weight,
wauc_skew_gamma,
)

Expand All @@ -35,9 +37,11 @@
hossain_adnan_skew,
kelly_skew,
l_skew,
left_quantile_weight,
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
right_quantile_weight,
wauc_skew_gamma,
]

Expand All @@ -59,10 +63,15 @@ def test_mock_aggregation_functions(
def test_skew_sensibility(func: typing.Callable, seed: int) -> None:
"""Testing for result correctness."""
rng = np.random.default_rng(seed)
no_skew = np.round(rng.normal(size=99), 2)
left_skew = np.round(rng.exponential(size=99) + 1, 2)
# round for the mode estimators to work properly
no_skew = np.round(rng.uniform(size=100), 2)
left_skew = np.round(rng.exponential(size=100) + 1, 2)
no_skew_res = func(no_skew)
left_skew_res = func(left_skew)
if func.__name__ == "right_quantile_weight":
# ugly but more harmonized this way
no_skew_res = -no_skew_res
left_skew_res = -left_skew_res
if no_skew_res > left_skew_res:
msg = (
f"Skewness in the first case should be lower, "
Expand All @@ -88,6 +97,14 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) ->
raise ValueError(msg)


@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight])
@pytest.mark.parametrize("q", [0.0, 1.0])
def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None:
"""Simple tets case for correctnes of q."""
with pytest.raises(ValueError, match="Parameter q should be in range"):
func(x_array_float, q=q)


@given(
arrays(
dtype=np.float64,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def test_mock_variation_functions(
def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None:
"""Testing for result correctness."""
rng = np.random.default_rng(seed)
low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=99)
high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=99)
low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100)
high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100)
low_var_res = func(low_var)
high_var_res = func(high_var)
if low_var_res < high_var_res:
Expand Down

0 comments on commit 68c7c7b

Please sign in to comment.