Skip to content

Implement forecast decomposition for SMA-based models #1180

Merged
merged 17 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
- Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
Expand All @@ -29,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071))
- Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094))
- Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
2 changes: 1 addition & 1 deletion etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
)

components_sum = target_components_df.sum(axis=1, level="segment")
if not np.array_equal(components_sum.values, self[..., "target"].values):
if not np.allclose(components_sum.values, self[..., "target"].values):
raise ValueError("Components don't sum up to target!")

self._target_components_names = components_names
Expand Down
33 changes: 27 additions & 6 deletions etna/models/seasonal_ma.py
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,27 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
"Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
)

def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Estimate forecast components."""
from etna.transforms import LagTransform
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved

self._validate_context(df=df, prediction_size=prediction_size)

lag_transform = LagTransform(
in_column="target",
lags=list(range(self.seasonality, self.context_size + 1, self.seasonality)),
out_column="target_component_lag",
)
target_components_df = lag_transform._transform(df) / self.window
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
target_components_df = target_components_df.iloc[-prediction_size:]
target_components_df = target_components_df.drop(columns=["target"], level="feature")
return target_components_df

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Make autoregressive forecasts on a wide dataframe."""
self._validate_context(df=df, prediction_size=prediction_size)

df = df.copy()
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
expected_length = prediction_size + self.context_size
history = df.loc[:, pd.IndexSlice[:, "target"]].values
history = history[-expected_length:-prediction_size]
Expand Down Expand Up @@ -128,18 +145,21 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._forecast(df=df, prediction_size=prediction_size)
ts.df = new_df

if return_components:
df.loc[-prediction_size:, pd.IndexSlice[:, "target"]] = new_df.loc[:, pd.IndexSlice[:, "target"]]
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
ts.add_target_components(target_components_df=target_components_df)
return ts

def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Make predictions on a wide dataframe using true values as autoregression context."""
self._validate_context(df=df, prediction_size=prediction_size)

df = df.copy()
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
expected_length = prediction_size + self.context_size
context = df.loc[:, pd.IndexSlice[:, "target"]].values
context = context[-expected_length:]
Expand Down Expand Up @@ -183,12 +203,13 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._predict(df=df, prediction_size=prediction_size)
ts.df = new_df

if return_components:
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
Mr-Geekman marked this conversation as resolved.
Show resolved Hide resolved
ts.add_target_components(target_components_df=target_components_df)
return ts


Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):

@pytest.fixture
def inconsistent_target_components_values_df(target_components_df):
target_components_df.loc[10, pd.IndexSlice["1", "target_component_a"]] = 100
target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
return target_components_df


Expand Down
60 changes: 60 additions & 0 deletions tests/test_models/test_simple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,63 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
)
def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize(
"window, seasonality, expected_components_names",
((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
)
def test_sma_model_predict_components_correct_names(
example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
assert sorted(forecast.target_components_names) == sorted(expected_components_names)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize("window", (1, 3, 5))
@pytest.mark.parametrize("seasonality", (1, 7, 14))
def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)

target = forecast.to_pandas(features=["target"])
target_components_df = forecast.get_target_components()
np.testing.assert_array_almost_equal(
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
target.values, target_components_df.sum(axis=1, level="segment").values, decimal=10
)


@pytest.fixture
def simple_ts() -> TSDataset:
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
periods = 100
timestamp = pd.date_range("2020-01-01", periods=periods)
df1 = pd.DataFrame({"timestamp": timestamp, "segment": "segment_1", "target": np.arange(1, periods + 1)})
df2 = pd.DataFrame({"timestamp": timestamp, "segment": "segment_2", "target": 100 + np.arange(1, periods + 1)})
df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.mark.parametrize(
"method_name, expected_values",
(("forecast", [[96, 196], [97, 197], [96, 196]]), ("predict", [[96, 196], [97, 197], [98, 198]])),
)
def test_sma_model_predict_components_correct(
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
simple_ts, method_name, expected_values, window=1, seasonality=2, horizon=3
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(simple_ts)
to_call = getattr(model, method_name)
forecast = to_call(ts=simple_ts, prediction_size=horizon, return_components=True)

target_components_df = forecast.get_target_components()
np.testing.assert_array_almost_equal(target_components_df.values, expected_values, decimal=10)
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved