Fix BasePipeline.forecast when prediction intervals are estimated o…

…n history data with presence of NaNs (#1291)
tinkoff-ai · Jun 20, 2023 · 3b0027f · 3b0027f · github-actions · Jun 20, 2023
1 parent f4509bb
commit 3b0027f
Show file tree

Hide file tree

Showing 4 changed files with 72 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fix problems with flake8 B023 ([#1252](https://github.com/tinkoff-ai/etna/pull/1252))
 - Fix problem with swapped forecast methods in HierarchicalPipeline ([#1259](https://github.com/tinkoff-ai/etna/pull/1259))
 - Fix problem with segment name "target" in `StackingEnsemble` ([#1262](https://github.com/tinkoff-ai/etna/pull/1262))
+- Fix `BasePipeline.forecast` when prediction intervals are estimated on history data with presence of NaNs ([#1291](https://github.com/tinkoff-ai/etna/pull/1291))
 
 ## [2.0.0] - 2023-04-11
 ### Added

diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
@@ -1,4 +1,5 @@
 import math
+import warnings
 from abc import abstractmethod
 from copy import deepcopy
 from enum import Enum
@@ -23,7 +24,6 @@
 from etna.core import BaseMixin
 from etna.datasets import TSDataset
 from etna.loggers import tslogger
-from etna.metrics import MAE
 from etna.metrics import Metric
 from etna.metrics import MetricAggregationMode
 
@@ -283,6 +283,29 @@ class FoldParallelGroup(TypedDict):
     forecast_masks: List[FoldMask]
 
 
+class _DummyMetric(Metric):
+    """Dummy metric that is created only for implementation of BasePipeline._forecast_prediction_interval."""
+
+    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+        super().__init__(mode=mode, metric_fn=self._compute_metric, **kwargs)
+
+    @staticmethod
+    def _compute_metric(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        return 0.0
+
+    @property
+    def greater_is_better(self) -> bool:
+        return False
+
+    def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[str, float]]:
+        segments = set(y_true.df.columns.get_level_values("segment"))
+        metrics_per_segment = {}
+        for segment in segments:
+            metrics_per_segment[segment] = 0.0
+        metrics = self._aggregate_metrics(metrics_per_segment)
+        return metrics
+
+
 class BasePipeline(AbstractPipeline, BaseMixin):
     """Base class for pipeline."""
 
@@ -315,12 +338,31 @@ def _forecast_prediction_interval(
     ) -> TSDataset:
         """Add prediction intervals to the forecasts."""
         with tslogger.disable():
-            _, forecasts, _ = self.backtest(ts=ts, metrics=[MAE()], n_folds=n_folds)
+            _, forecasts, _ = self.backtest(ts=ts, metrics=[_DummyMetric()], n_folds=n_folds)
 
         self._add_forecast_borders(ts=ts, backtest_forecasts=forecasts, quantiles=quantiles, predictions=predictions)
 
         return predictions
 
+    @staticmethod
+    def _validate_residuals_for_interval_estimation(backtest_forecasts: TSDataset, residuals: pd.DataFrame):
+        len_backtest, num_segments = residuals.shape
+        min_timestamp = backtest_forecasts.index.min()
+        max_timestamp = backtest_forecasts.index.max()
+        non_nan_counts = np.sum(~np.isnan(residuals.values), axis=0)
+        if np.any(non_nan_counts < len_backtest):
+            warnings.warn(
+                f"There are NaNs in target on time span from {min_timestamp} to {max_timestamp}. "
+                f"It can obstruct prediction interval estimation on history data."
+            )
+        if np.any(non_nan_counts < 2):
+            raise ValueError(
+                f"There aren't enough target values to evaluate prediction intervals on history! "
+                f"For each segment there should be at least 2 points with defined value in a "
+                f"time span from {min_timestamp} to {max_timestamp}. "
+                f"You can try to increase n_folds parameter to make time span bigger."
+            )
+
     def _add_forecast_borders(
         self, ts: TSDataset, backtest_forecasts: pd.DataFrame, quantiles: Sequence[float], predictions: TSDataset
     ) -> None:
@@ -331,7 +373,9 @@ def _add_forecast_borders(
             - ts[backtest_forecasts.index.min() : backtest_forecasts.index.max(), :, "target"]
         )
 
-        sigma = np.std(residuals.values, axis=0)
+        self._validate_residuals_for_interval_estimation(backtest_forecasts=backtest_forecasts, residuals=residuals)
+        sigma = np.nanstd(residuals.values, axis=0)
+
         borders = []
         for quantile in quantiles:
             z_q = norm.ppf(q=quantile)

diff --git a/tests/test_pipeline/test_hierarchical_pipeline.py b/tests/test_pipeline/test_hierarchical_pipeline.py
@@ -412,9 +412,9 @@ def test_interval_metrics(product_level_constant_hierarchical_ts, metric_type, r
     results, _, _ = pipeline.backtest(
         ts=ts,
         metrics=[metric],
-        n_folds=2,
+        n_folds=1,
         aggregate_metrics=True,
-        forecast_params={"prediction_interval": True, "n_folds": 1},
+        forecast_params={"prediction_interval": True, "n_folds": 2},
     )
     np.testing.assert_allclose(results[metric.name], answer)
 

diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py
@@ -266,6 +266,28 @@ def test_forecast_prediction_interval_not_builtin(example_tsds, model):
         assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all()
 
 
+@pytest.mark.parametrize("model", (MovingAverageModel(), LinearPerSegmentModel()))
+def test_forecast_prediction_interval_not_builtin_with_nans_warning(example_tsds, model):
+    example_tsds.df.loc[example_tsds.index[-2], pd.IndexSlice["segment_1", "target"]] = None
+
+    pipeline = Pipeline(model=model, transforms=[DateFlagsTransform()], horizon=5)
+    pipeline.fit(example_tsds)
+    with pytest.warns(UserWarning, match="There are NaNs in target on time span from .* to .*"):
+        _ = pipeline.forecast(prediction_interval=True, quantiles=[0.025, 0.975])
+
+
+@pytest.mark.parametrize("model", (MovingAverageModel(), LinearPerSegmentModel()))
+def test_forecast_prediction_interval_not_builtin_with_nans_error(example_tsds, model):
+    example_tsds.df.loc[example_tsds.index[-20:-1], pd.IndexSlice["segment_1", "target"]] = None
+
+    pipeline = Pipeline(model=model, transforms=[DateFlagsTransform()], horizon=5)
+    pipeline.fit(example_tsds)
+    with pytest.raises(
+        ValueError, match="There aren't enough target values to evaluate prediction intervals on history"
+    ):
+        _ = pipeline.forecast(prediction_interval=True, quantiles=[0.025, 0.975])
+
+
 def test_forecast_prediction_interval_correct_values(splited_piecewise_constant_ts):
     """Test that the prediction interval for piecewise-constant dataset is correct."""
     train, test = splited_piecewise_constant_ts