tinkoff-ai · alex-hse-repository · Mar 29, 2023 · Mar 23, 2023 · Mar 23, 2023 · Mar 23, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
+- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
 - Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
 - Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
 - Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
@@ -29,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071))
 - Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094))
 - Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
--
 ### Changed
 - Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
 - Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
@@ -1166,7 +1166,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
                 )
 
         components_sum = target_components_df.sum(axis=1, level="segment")
-        if not np.array_equal(components_sum.values, self[..., "target"].values):
+        if not np.allclose(components_sum.values, self[..., "target"].values):
             raise ValueError("Components don't sum up to target!")
 
         self._target_components_names = components_names

diff --git a/etna/models/seasonal_ma.py b/etna/models/seasonal_ma.py
@@ -81,10 +81,27 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
                 "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
             )
 
+    def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
+        """Estimate forecast components."""
+        from etna.transforms import LagTransform
+
+        self._validate_context(df=df, prediction_size=prediction_size)
+
+        lag_transform = LagTransform(
+            in_column="target",
+            lags=list(range(self.seasonality, self.context_size + 1, self.seasonality)),
+            out_column="target_component_lag",
+        )
+        target_components_df = lag_transform._transform(df) / self.window
+        target_components_df = target_components_df.iloc[-prediction_size:]
+        target_components_df = target_components_df.drop(columns=["target"], level="feature")
+        return target_components_df
+
     def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
         """Make autoregressive forecasts on a wide dataframe."""
         self._validate_context(df=df, prediction_size=prediction_size)
 
+        df = df.copy()
         expected_length = prediction_size + self.context_size
         history = df.loc[:, pd.IndexSlice[:, "target"]].values
         history = history[-expected_length:-prediction_size]
@@ -128,18 +145,21 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
         ValueError:
             if forecast context contains NaNs
         """
-        if return_components:
-            raise NotImplementedError("This mode isn't currently implemented!")
-
         df = ts.to_pandas()
         new_df = self._forecast(df=df, prediction_size=prediction_size)
         ts.df = new_df
+
+        if return_components:
+            df.loc[-prediction_size:, pd.IndexSlice[:, "target"]] = new_df.loc[:, pd.IndexSlice[:, "target"]]
+            target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
+            ts.add_target_components(target_components_df=target_components_df)
         return ts
 
     def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
         """Make predictions on a wide dataframe using true values as autoregression context."""
         self._validate_context(df=df, prediction_size=prediction_size)
 
+        df = df.copy()
         expected_length = prediction_size + self.context_size
         context = df.loc[:, pd.IndexSlice[:, "target"]].values
         context = context[-expected_length:]
@@ -183,12 +203,13 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
         ValueError:
             if forecast context contains NaNs
         """
-        if return_components:
-            raise NotImplementedError("This mode isn't currently implemented!")
-
         df = ts.to_pandas()
         new_df = self._predict(df=df, prediction_size=prediction_size)
         ts.df = new_df
+
+        if return_components:
+            target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
+            ts.add_target_components(target_components_df=target_components_df)
         return ts
 
 

diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -224,7 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):
 
 @pytest.fixture
 def inconsistent_target_components_values_df(target_components_df):
-    target_components_df.loc[10, pd.IndexSlice["1", "target_component_a"]] = 100
+    target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
     return target_components_df
 
 

diff --git a/tests/test_models/test_simple_models.py b/tests/test_models/test_simple_models.py
@@ -729,3 +729,63 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
 )
 def test_save_load(model, example_tsds):
     assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)
+
+
+@pytest.mark.parametrize("method_name", ("forecast", "predict"))
+@pytest.mark.parametrize(
+    "window, seasonality, expected_components_names",
+    ((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
+)
+def test_sma_model_predict_components_correct_names(
+    example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
+):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(example_tsds)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
+    assert sorted(forecast.target_components_names) == sorted(expected_components_names)
+
+
+@pytest.mark.parametrize("method_name", ("forecast", "predict"))
+@pytest.mark.parametrize("window", (1, 3, 5))
+@pytest.mark.parametrize("seasonality", (1, 7, 14))
+def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(example_tsds)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
+
+    target = forecast.to_pandas(features=["target"])
+    target_components_df = forecast.get_target_components()
+    np.testing.assert_array_almost_equal(
+        target.values, target_components_df.sum(axis=1, level="segment").values, decimal=10
+    )
+
+
+@pytest.fixture
+def simple_ts() -> TSDataset:
+    periods = 100
+    timestamp = pd.date_range("2020-01-01", periods=periods)
+    df1 = pd.DataFrame({"timestamp": timestamp, "segment": "segment_1", "target": np.arange(1, periods + 1)})
+    df2 = pd.DataFrame({"timestamp": timestamp, "segment": "segment_2", "target": 100 + np.arange(1, periods + 1)})
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
+
+
+@pytest.mark.parametrize(
+    "method_name, expected_values",
+    (("forecast", [[96, 196], [97, 197], [96, 196]]), ("predict", [[96, 196], [97, 197], [98, 198]])),
+)
+def test_sma_model_predict_components_correct(
+    simple_ts, method_name, expected_values, window=1, seasonality=2, horizon=3
+):
+    model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
+    model.fit(simple_ts)
+    to_call = getattr(model, method_name)
+    forecast = to_call(ts=simple_ts, prediction_size=horizon, return_components=True)
+
+    target_components_df = forecast.get_target_components()
+    np.testing.assert_array_almost_equal(target_components_df.values, expected_values, decimal=10)