Skip to content

Wape metric #1085

Merged
merged 5 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
-
- Add `WAPE` metric & `wape` functional metric ([#1085](https://github.com/tinkoff-ai/etna/pull/1085))
-
-
-
-
### Changed
Expand Down
2 changes: 2 additions & 0 deletions etna/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from etna.metrics.functional_metrics import rmse
from etna.metrics.functional_metrics import sign
from etna.metrics.functional_metrics import smape
from etna.metrics.functional_metrics import wape
looopka marked this conversation as resolved.
Show resolved Hide resolved
from etna.metrics.intervals_metrics import Coverage
from etna.metrics.intervals_metrics import Width
from etna.metrics.metrics import MAE
Expand All @@ -20,6 +21,7 @@
from etna.metrics.metrics import R2
from etna.metrics.metrics import RMSE
from etna.metrics.metrics import SMAPE
from etna.metrics.metrics import WAPE
from etna.metrics.metrics import MaxDeviation
from etna.metrics.metrics import MedAE
from etna.metrics.metrics import Sign
Expand Down
31 changes: 31 additions & 0 deletions etna/metrics/functional_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,34 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike) -> float:


rmse = partial(mse, squared=False)


def wape(y_true: ArrayLike, y_pred: ArrayLike) -> float:
"""Weighted average percentage Error metric.

.. math::
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|}

Parameters
----------
y_true:
array-like of shape (n_samples,) or (n_samples, n_outputs)

Ground truth (correct) target values.

y_pred:
array-like of shape (n_samples,) or (n_samples, n_outputs)

Estimated target values.

Returns
-------
float
A floating point value (the best value is 0.0).
"""
y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)

if len(y_true_array.shape) != len(y_pred_array.shape):
raise ValueError("Shapes of the labels must be the same")

return np.sum(np.abs(y_true_array - y_pred_array)) / np.sum(np.abs(y_true_array))
31 changes: 30 additions & 1 deletion etna/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from etna.metrics import rmse
from etna.metrics import sign
from etna.metrics import smape
from etna.metrics import wape
from etna.metrics.base import Metric
from etna.metrics.base import MetricAggregationMode

Expand Down Expand Up @@ -302,4 +303,32 @@ def greater_is_better(self) -> bool:
return False


__all__ = ["MAE", "MSE", "RMSE", "R2", "MSLE", "MAPE", "SMAPE", "MedAE", "Sign", "MaxDeviation"]
class WAPE(Metric):
"""Weighted average percentage Error metric with multi-segment computation support.

.. math::
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|}
Notes
-----
You can read more about logic of multi-segment metrics in Metric docs.
"""

def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
"""Init metric.

Parameters
----------
mode: 'macro' or 'per-segment'
metrics aggregation mode
kwargs:
metric's computation arguments
"""
super().__init__(mode=mode, metric_fn=wape, **kwargs)

@property
def greater_is_better(self) -> bool:
"""Whether higher metric value is better."""
return False


__all__ = ["MAE", "MSE", "RMSE", "R2", "MSLE", "MAPE", "SMAPE", "MedAE", "Sign", "MaxDeviation", "WAPE"]
3 changes: 3 additions & 0 deletions tests/test_metrics/test_functional_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from etna.metrics import rmse
from etna.metrics import sign
from etna.metrics import smape
from etna.metrics import wape


@pytest.fixture()
Expand Down Expand Up @@ -39,6 +40,7 @@ def y_pred_1d():
(r2_score, 0),
(sign, -1),
(max_deviation, 2),
(wape, 1),
),
)
def test_all_1d_metrics(metric, right_metrics_value, y_true_1d, y_pred_1d):
Expand Down Expand Up @@ -74,6 +76,7 @@ def y_pred_2d():
(r2_score, 0.0),
(sign, -1),
(max_deviation, 4),
(wape, 1),
),
)
def test_all_2d_metrics(metric, right_metrics_value, y_true_2d, y_pred_2d):
Expand Down
19 changes: 12 additions & 7 deletions tests/test_metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from etna.metrics import rmse
from etna.metrics import sign
from etna.metrics import smape
from etna.metrics import wape
from etna.metrics.base import MetricAggregationMode
from etna.metrics.metrics import MAE
from etna.metrics.metrics import MAPE
Expand All @@ -20,6 +21,7 @@
from etna.metrics.metrics import R2
from etna.metrics.metrics import RMSE
from etna.metrics.metrics import SMAPE
from etna.metrics.metrics import WAPE
from etna.metrics.metrics import MaxDeviation
from etna.metrics.metrics import MedAE
from etna.metrics.metrics import Sign
Expand All @@ -41,6 +43,7 @@
(Sign, "Sign", {}, ""),
(MaxDeviation, "MaxDeviation", {}, ""),
(DummyMetric, "DummyMetric", {"alpha": 1.0}, "alpha = 1.0, "),
(WAPE, "WAPE", {}, ""),
),
)
def test_repr(metric_class, metric_class_repr, metric_params, param_repr):
Expand All @@ -56,7 +59,7 @@ def test_repr(metric_class, metric_class_repr, metric_params, param_repr):

@pytest.mark.parametrize(
"metric_class",
(MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation),
(MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE),
)
def test_name_class_name(metric_class):
"""Check metrics name property without changing its during inheritance"""
Expand All @@ -80,7 +83,7 @@ def test_name_repr(metric_class):
assert metric_name == true_name


@pytest.mark.parametrize("metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation))
@pytest.mark.parametrize("metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE))
def test_metrics_macro(metric_class, train_test_dfs):
"""Check metrics interface in 'macro' mode"""
forecast_df, true_df = train_test_dfs
Expand All @@ -90,7 +93,7 @@ def test_metrics_macro(metric_class, train_test_dfs):


@pytest.mark.parametrize(
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric)
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
)
def test_metrics_per_segment(metric_class, train_test_dfs):
"""Check metrics interface in 'per-segment' mode"""
Expand All @@ -103,7 +106,7 @@ def test_metrics_per_segment(metric_class, train_test_dfs):


@pytest.mark.parametrize(
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric)
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
)
def test_metrics_invalid_aggregation(metric_class):
"""Check metrics behavior in case of invalid aggregation mode"""
Expand All @@ -112,7 +115,7 @@ def test_metrics_invalid_aggregation(metric_class):


@pytest.mark.parametrize(
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric)
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
)
def test_invalid_timestamps(metric_class, two_dfs_with_different_timestamps):
"""Check metrics behavior in case of invalid timeranges"""
Expand All @@ -123,7 +126,7 @@ def test_invalid_timestamps(metric_class, two_dfs_with_different_timestamps):


@pytest.mark.parametrize(
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric)
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
)
def test_invalid_segments(metric_class, two_dfs_with_different_segments_sets):
"""Check metrics behavior in case of invalid segments sets"""
Expand All @@ -134,7 +137,7 @@ def test_invalid_segments(metric_class, two_dfs_with_different_segments_sets):


@pytest.mark.parametrize(
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric)
"metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
)
def test_invalid_segments_target(metric_class, train_test_dfs):
"""Check metrics behavior in case of no target column in segment"""
Expand All @@ -159,6 +162,7 @@ def test_invalid_segments_target(metric_class, train_test_dfs):
(Sign, sign),
(MaxDeviation, max_deviation),
(DummyMetric, create_dummy_functional_metric()),
(WAPE, wape),
),
)
def test_metrics_values(metric_class, metric_fn, train_test_dfs):
Expand Down Expand Up @@ -191,6 +195,7 @@ def test_metrics_values(metric_class, metric_fn, train_test_dfs):
(Sign(), None),
(MaxDeviation(), False),
(DummyMetric(), False),
(WAPE(), False),
),
)
def test_metrics_greater_is_better(metric, greater_is_better):
Expand Down