Merge pull request #155 from glevv/add-new-stats

Readme updates
glevv · Nov 24, 2024 · 68c7c7b · 68c7c7b
2 parents e34e92d + 30c58b8
commit 68c7c7b
Show file tree

Hide file tree

Showing 9 changed files with 106 additions and 101 deletions.
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@
 
 - Collection of measures of central tendency - `obscure_stats/central_tendency`:
     * Contraharmonic Mean;
+    * Grenander's Mode;
     * Half-Sample Mode;
     * Hodges-Lehmann-Sen Location;
     * Midhinge;
@@ -48,16 +49,19 @@
     * Hossain-Adnan Skewness Coefficient;
     * Kelly Skewness Coefficient;
     * L-Skewness Coefficient;
+    * Left Quantile Weight;
     * Medeen Skewness Coefficient;
     * Pearson Median Skewness Coefficient;
-    * Pearson Mode Skewness Coefficient.
+    * Pearson Mode Skewness Coefficient;
+    * Right Quantile Weight;
 - Collection of measures of kurtosis - `obscure_stats/kurtosis`:
     * Crow-Siddiqui Kurtosis;
     * L-Kurtosis;
     * Hogg Kurtosis;
     * Moors Kurtosis;
     * Moors Octile Kurtosis;
     * Reza-Ma Kurtosis;
+    * Schmid-Trede measure of Peakedness;
     * Staudte Kurtosis.
 - Collection of measures of association - `obscure_stats/association`:
     * Blomqvist's Beta;

diff --git a/src/obscure_stats/kurtosis/__init__.py b/src/obscure_stats/kurtosis/__init__.py
@@ -4,11 +4,9 @@
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
-    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
-    right_quantile_weight,
     schmid_trede_peakedness,
     staudte_kurt,
 )
@@ -17,11 +15,9 @@
     "crow_siddiqui_kurt",
     "hogg_kurt",
     "l_kurt",
-    "left_quantile_weight",
     "moors_kurt",
     "moors_octile_kurt",
     "reza_ma_kurt",
-    "right_quantile_weight",
     "schmid_trede_peakedness",
     "staudte_kurt",
 ]
diff --git a/src/obscure_stats/kurtosis/kurtosis.py b/src/obscure_stats/kurtosis/kurtosis.py
@@ -232,75 +232,3 @@ def schmid_trede_peakedness(x: np.ndarray) -> float:
     """
     p125, p25, p75, p875 = np.nanquantile(x, [0.125, 0.25, 0.75, 0.875])
     return (p875 - p125) / (p75 - p25)
-
-
-def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float:
-    """Calculate left quantile weight (LQW).
-
-    It is based on inter-percentile ranges (uncentered, unscaled) of the
-    left tail of the distribution.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array.
-    q : float
-        Quantile to use for the anchor.
-
-    Returns
-    -------
-    lqw : float
-        The value of left quantile weight.
-
-    References
-    ----------
-    Brys, G.; Hubert, M.; Struyf, A. (2006).
-    Robust measures of tail weight.
-    Computational Statistics and Data Analysis 50(3), 733-759.
-    """
-    min_q, max_q = 0.0, 0.5
-    if q <= min_q or q >= max_q:
-        msg = "Parameter q should be in range (0, 0.5)."
-        raise ValueError(msg)
-    lower_quantile, q025, upper_quantile = np.nanquantile(
-        x, [q * 0.5, 0.25, (1 - q) * 0.5]
-    )
-    return -(upper_quantile + lower_quantile - 2 * q025) / (
-        upper_quantile - lower_quantile
-    )
-
-
-def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float:
-    """Calculate right quantile weight (RQW).
-
-    It is based on inter-percentile ranges (uncentered, unscaled) of the
-    right tail of the distribution.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array.
-    q : float
-        Quantile to use for the anchor.
-
-    Returns
-    -------
-    rqw : float
-        The value of right quantile weight.
-
-    References
-    ----------
-    Brys, G.; Hubert, M.; Struyf, A. (2006).
-    Robust measures of tail weight.
-    Computational Statistics and Data Analysis 50(3), 733-759.
-    """
-    min_q, max_q = 0.5, 1.0
-    if q <= min_q or q >= max_q:
-        msg = "Parameter q should be in range (0.5, 1.0)."
-        raise ValueError(msg)
-    lower_quantile, q075, upper_quantile = np.nanquantile(
-        x, [1 - q * 0.5, 0.75, (1 + q) * 0.5]
-    )
-    return (lower_quantile + upper_quantile - 2 * q075) / (
-        lower_quantile - upper_quantile
-    )
diff --git a/src/obscure_stats/skewness/__init__.py b/src/obscure_stats/skewness/__init__.py
@@ -10,9 +10,11 @@
     hossain_adnan_skew,
     kelly_skew,
     l_skew,
+    left_quantile_weight,
     medeen_skew,
     pearson_median_skew,
     pearson_mode_skew,
+    right_quantile_weight,
     wauc_skew_gamma,
 )
 
@@ -26,8 +28,10 @@
     "hossain_adnan_skew",
     "kelly_skew",
     "l_skew",
+    "left_quantile_weight",
     "medeen_skew",
     "pearson_median_skew",
     "pearson_mode_skew",
+    "right_quantile_weight",
     "wauc_skew_gamma",
 ]
diff --git a/src/obscure_stats/skewness/skewness.py b/src/obscure_stats/skewness/skewness.py
@@ -400,3 +400,75 @@ def cumulative_skew(x: np.ndarray) -> float:
     d = q - p
     w = (2 * r - n) * 3 / n
     return np.sum(d * w) / np.sum(d)
+
+
+def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float:
+    """Calculate left quantile weight (LQW).
+
+    It is based on inter-percentile ranges (uncentered, unscaled) of the
+    left tail of the distribution.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    q : float
+        Quantile to use for the anchor.
+
+    Returns
+    -------
+    lqw : float
+        The value of left quantile weight.
+
+    References
+    ----------
+    Brys, G.; Hubert, M.; Struyf, A. (2006).
+    Robust measures of tail weight.
+    Computational Statistics and Data Analysis 50(3), 733-759.
+    """
+    min_q, max_q = 0.0, 0.5
+    if q <= min_q or q >= max_q:
+        msg = "Parameter q should be in range (0, 0.5)."
+        raise ValueError(msg)
+    lower_quantile, q025, upper_quantile = np.nanquantile(
+        x, [q * 0.5, 0.25, (1 - q) * 0.5]
+    )
+    return -(upper_quantile + lower_quantile - 2 * q025) / (
+        upper_quantile - lower_quantile
+    )
+
+
+def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float:
+    """Calculate right quantile weight (RQW).
+
+    It is based on inter-percentile ranges (uncentered, unscaled) of the
+    right tail of the distribution.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    q : float
+        Quantile to use for the anchor.
+
+    Returns
+    -------
+    rqw : float
+        The value of right quantile weight.
+
+    References
+    ----------
+    Brys, G.; Hubert, M.; Struyf, A. (2006).
+    Robust measures of tail weight.
+    Computational Statistics and Data Analysis 50(3), 733-759.
+    """
+    min_q, max_q = 0.5, 1.0
+    if q <= min_q or q >= max_q:
+        msg = "Parameter q should be in range (0.5, 1.0)."
+        raise ValueError(msg)
+    lower_quantile, q075, upper_quantile = np.nanquantile(
+        x, [1 - q * 0.5, 0.75, (1 + q) * 0.5]
+    )
+    return (lower_quantile + upper_quantile - 2 * q075) / (
+        lower_quantile - upper_quantile
+    )
diff --git a/tests/test_dispersion.py b/tests/test_dispersion.py
@@ -78,8 +78,8 @@ def test_mock_aggregation_functions(
 def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    low_disp = np.round(rng.exponential(scale=1, size=99) + 1, 2)
-    high_disp = np.round(rng.exponential(scale=10, size=99) + 1, 2)
+    low_disp = np.round(rng.exponential(scale=1, size=100) + 1, 2)
+    high_disp = np.round(rng.exponential(scale=10, size=100) + 1, 2)
     low_disp_res = func(low_disp)
     high_disp_res = func(high_disp)
     if low_disp_res > high_disp_res:

diff --git a/tests/test_kurtosis.py b/tests/test_kurtosis.py
@@ -13,11 +13,9 @@
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
-    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
-    right_quantile_weight,
     schmid_trede_peakedness,
     staudte_kurt,
 )
@@ -26,11 +24,9 @@
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
-    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
-    right_quantile_weight,
     schmid_trede_peakedness,
     staudte_kurt,
 ]
@@ -53,14 +49,10 @@ def test_mock_aggregation_functions(
 def test_kurt_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    platy = rng.uniform(size=99)
-    lepto = rng.laplace(size=99)
+    platy = rng.uniform(size=100)
+    lepto = rng.laplace(size=100)
     platy_res = func(platy)
     lepto_res = func(lepto)
-    if func.__name__ == "right_quantile_weight":
-        # ugly but more harmonized this way
-        platy_res = -platy_res
-        lepto_res = -lepto_res
     if platy_res > lepto_res:
         msg = (
             f"Kurtosis in the first case should be lower, got {platy_res} > {lepto_res}"
@@ -76,14 +68,6 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) ->
         raise ValueError(msg)
 
 
-@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight])
-@pytest.mark.parametrize("q", [0.0, 1.0])
-def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None:
-    """Simple tets case for correctnes of q."""
-    with pytest.raises(ValueError, match="Parameter q should be in range"):
-        func(x_array_float, q=q)
-
-
 @given(
     arrays(
         dtype=np.float64,

diff --git a/tests/test_skewness.py b/tests/test_skewness.py
@@ -19,9 +19,11 @@
     hossain_adnan_skew,
     kelly_skew,
     l_skew,
+    left_quantile_weight,
     medeen_skew,
     pearson_median_skew,
     pearson_mode_skew,
+    right_quantile_weight,
     wauc_skew_gamma,
 )
 
@@ -35,9 +37,11 @@
     hossain_adnan_skew,
     kelly_skew,
     l_skew,
+    left_quantile_weight,
     medeen_skew,
     pearson_median_skew,
     pearson_mode_skew,
+    right_quantile_weight,
     wauc_skew_gamma,
 ]
 
@@ -59,10 +63,15 @@ def test_mock_aggregation_functions(
 def test_skew_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    no_skew = np.round(rng.normal(size=99), 2)
-    left_skew = np.round(rng.exponential(size=99) + 1, 2)
+    # round for the mode estimators to work properly
+    no_skew = np.round(rng.uniform(size=100), 2)
+    left_skew = np.round(rng.exponential(size=100) + 1, 2)
     no_skew_res = func(no_skew)
     left_skew_res = func(left_skew)
+    if func.__name__ == "right_quantile_weight":
+        # ugly but more harmonized this way
+        no_skew_res = -no_skew_res
+        left_skew_res = -left_skew_res
     if no_skew_res > left_skew_res:
         msg = (
             f"Skewness in the first case should be lower, "
@@ -88,6 +97,14 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) ->
         raise ValueError(msg)
 
 
+@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight])
+@pytest.mark.parametrize("q", [0.0, 1.0])
+def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None:
+    """Simple tets case for correctnes of q."""
+    with pytest.raises(ValueError, match="Parameter q should be in range"):
+        func(x_array_float, q=q)
+
+
 @given(
     arrays(
         dtype=np.float64,

diff --git a/tests/test_variation.py b/tests/test_variation.py
@@ -49,8 +49,8 @@ def test_mock_variation_functions(
 def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=99)
-    high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=99)
+    low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100)
+    high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100)
     low_var_res = func(low_var)
     high_var_res = func(high_var)
     if low_var_res < high_var_res: