diff --git a/CHANGELOG.md b/CHANGELOG.md index 703f17aa3..8633d7920 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Method `forecast_components` for forecast decomposition in `_TBATSAdapter` ([#1125](https://github.com/tinkoff-ai/etna/issues/1125)) - Methods `forecast_components` and `predict_components` for forecast decomposition in `_CatBoostAdapter` ([#1135](https://github.com/tinkoff-ai/etna/issues/1135)) - Methods `forecast_components` and `predict_components` for forecast decomposition in `_HoltWintersAdapter ` ([#1146](https://github.com/tinkoff-ai/etna/issues/1146)) -- +- Methods `predict_components` for forecast decomposition in `_ProphetAdapter` ([#1161](https://github.com/tinkoff-ai/etna/issues/1161)) +- ### Changed - Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809)) - Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110)) diff --git a/etna/models/prophet.py b/etna/models/prophet.py index 9b1b26148..92a973fcb 100644 --- a/etna/models/prophet.py +++ b/etna/models/prophet.py @@ -5,6 +5,7 @@ from typing import List from typing import Optional from typing import Sequence +from typing import Set from typing import Union import pandas as pd @@ -106,10 +107,7 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter": List of the columns with regressors """ self.regressor_columns = regressors - prophet_df = pd.DataFrame() - prophet_df["y"] = df["target"] - prophet_df["ds"] = df["timestamp"] - prophet_df[self.regressor_columns] = df[self.regressor_columns] + prophet_df = self._prepare_prophet_df(df=df) for regressor in self.regressor_columns: if regressor not in self.predefined_regressors_names: self.model.add_regressor(regressor) @@ -134,11 +132,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen : DataFrame with predictions """ - df = df.reset_index() - prophet_df = pd.DataFrame() - prophet_df["y"] = df["target"] - prophet_df["ds"] = df["timestamp"] - prophet_df[self.regressor_columns] = df[self.regressor_columns] + prophet_df = self._prepare_prophet_df(df=df) forecast = self.model.predict(prophet_df) y_pred = pd.DataFrame(forecast["yhat"]) if prediction_interval: @@ -152,6 +146,86 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen y_pred = y_pred.rename(rename_dict, axis=1) return y_pred + def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame: + """Prepare dataframe for fit and predict.""" + if self.regressor_columns is None: + raise ValueError("List of regressor is not set!") + + df = df.reset_index() + + prophet_df = pd.DataFrame() + prophet_df["y"] = df["target"] + prophet_df["ds"] = df["timestamp"] + prophet_df[self.regressor_columns] = df[self.regressor_columns] + return prophet_df + + @staticmethod + def _filter_aggregated_components(components: Iterable[str]) -> Set[str]: + """Filter out aggregated components.""" + # aggregation of corresponding model terms, e.g. sum + aggregated_components = { + "additive_terms", + "multiplicative_terms", + "extra_regressors_additive", + "extra_regressors_multiplicative", + } + + return set(components) - aggregated_components + + def _check_mul_components(self): + """Raise error if model contains multiplicative components.""" + components_modes = self.model.component_modes + if components_modes is None: + raise ValueError("This model is not fitted!") + + mul_components = self._filter_aggregated_components(self.model.component_modes["multiplicative"]) + if len(mul_components) > 0: + raise ValueError("Forecast decomposition is only supported for additive components!") + + def _predict_seasonal_components(self, df: pd.DataFrame) -> pd.DataFrame: + """Estimate seasonal, holidays and exogenous components.""" + model = self.model + + seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df) + + holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set() + + components_names = list( + filter(lambda v: v not in holiday_names, self._filter_aggregated_components(component_cols.columns)) + ) + + beta_c = model.params["beta"].T * component_cols[components_names].values + comp = seasonal_features.values @ beta_c + + # apply rescaling for additive components + comp *= model.y_scale + + return pd.DataFrame(data=comp, columns=components_names) + + def predict_components(self, df: pd.DataFrame) -> pd.DataFrame: + """Estimate prediction components. + + Parameters + ---------- + df: + features dataframe + + Returns + ------- + : + dataframe with prediction components + """ + self._check_mul_components() + + prophet_df = self._prepare_prophet_df(df=df) + + prophet_df = self.model.setup_dataframe(prophet_df) + + components = self._predict_seasonal_components(df=prophet_df) + components["trend"] = self.model.predict_trend(df=prophet_df) + + return components.add_prefix("target_component_") + def get_model(self) -> Prophet: """Get internal prophet.Prophet model that is used inside etna class. @@ -200,6 +274,12 @@ class ProphetModel( Original Prophet can use features 'cap' and 'floor', they should be added to the known_future list on dataset initialization. + This model supports in-sample and out-of-sample forecast decomposition. The number + of components in the decomposition depends on model parameters. Main components are: + trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed + down to individual periods if fitted. Holiday and exogenous will be present in decomposition + if fitted.Corresponding components are obtained directly from the model. + Examples -------- >>> from etna.datasets import generate_periodic_df diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py index adf50b258..5ee3368d8 100644 --- a/tests/test_models/conftest.py +++ b/tests/test_models/conftest.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from etna.datasets import generate_ar_df @@ -16,3 +17,15 @@ def new_format_exog(): exog = generate_ar_df(periods=60, start_time="2021-06-01", n_segments=2) df = TSDataset.to_dataset(exog) return df + + +@pytest.fixture() +def dfs_w_exog(): + df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1) + df["f1"] = np.sin(df["target"]) + df["f2"] = np.cos(df["target"]) + + df.drop(columns=["segment"], inplace=True) + train = df.iloc[:-5] + test = df.iloc[-5:] + return train, test diff --git a/tests/test_models/test_catboost.py b/tests/test_models/test_catboost.py index 102e4520d..f86815120 100644 --- a/tests/test_models/test_catboost.py +++ b/tests/test_models/test_catboost.py @@ -147,18 +147,6 @@ def test_save_load(model, example_tsds): assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=transforms, horizon=horizon) -@pytest.fixture() -def dfs_w_exog(): - df = generate_ar_df(start_time="2021-01-01", periods=105, n_segments=1) - df["f1"] = np.sin(df["target"]) - df["f2"] = np.cos(df["target"]) - - df.drop(columns=["segment"], inplace=True) - train = df.iloc[:-5] - test = df.iloc[-5:] - return train, test - - def test_forecast_components_equal_predict_components(dfs_w_exog): train, test = dfs_w_exog diff --git a/tests/test_models/test_prophet.py b/tests/test_models/test_prophet.py index 3724e3709..f90e6f07c 100644 --- a/tests/test_models/test_prophet.py +++ b/tests/test_models/test_prophet.py @@ -214,3 +214,154 @@ def test_custom_seasonality(custom_seasonality): model = ProphetModel(additional_seasonality_params=custom_seasonality) for seasonality in custom_seasonality: assert seasonality["name"] in model._base_model.model.seasonalities + + +@pytest.fixture +def prophet_dfs(dfs_w_exog): + df = pd.concat(dfs_w_exog, axis=0) + df["cap"] = 4.0 + + h1_mask = np.arange(len(df)) % 3 == 0 + h2_mask = np.arange(len(df)) % 5 == 0 + + h1 = pd.DataFrame( + { + "holiday": "h1", + "ds": df["timestamp"][h1_mask], + "lower_window": 0, + "upper_window": 1, + } + ) + + h2 = pd.DataFrame( + { + "holiday": "h2", + "ds": df["timestamp"][h2_mask], + "lower_window": 0, + "upper_window": 1, + } + ) + holidays = pd.concat([h1, h2]).reset_index(drop=True) + + return df.iloc[-60:-20], df.iloc[-20:], holidays + + +def test_check_mul_components_not_fitted_error(): + model = _ProphetAdapter() + with pytest.raises(ValueError, match="This model is not fitted!"): + model._check_mul_components() + + +def test_prepare_prophet_df_regressors_not_set_error(prophet_dfs): + _, test, _ = prophet_dfs + model = _ProphetAdapter() + with pytest.raises(ValueError, match="List of regressor is not set!"): + model._prepare_prophet_df(df=test) + + +@pytest.mark.parametrize( + "seasonality_mode,custom_seasonality", + ( + ("multiplicative", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "additive"}]), + ("multiplicative", []), + ("additive", [{"name": "s1", "period": 14, "fourier_order": 1, "mode": "multiplicative"}]), + ), +) +def test_check_mul_components(prophet_dfs, seasonality_mode, custom_seasonality): + _, test, _ = prophet_dfs + + model = _ProphetAdapter(seasonality_mode=seasonality_mode, additional_seasonality_params=custom_seasonality) + model.fit(df=test, regressors=["f1", "f2"]) + + with pytest.raises(ValueError, match="Forecast decomposition is only supported for additive components!"): + model.predict_components(df=test) + + +@pytest.mark.parametrize( + "regressors,regressors_comps", ((["f1", "f2", "cap"], ["target_component_f1", "target_component_f2"]), ([], [])) +) +@pytest.mark.parametrize( + "custom_seas,custom_seas_comp", + ( + ([{"name": "s1", "period": 14, "fourier_order": 1}], ["target_component_s1"]), + ([], []), + ), +) +@pytest.mark.parametrize("use_holidays,holidays_comp", ((True, ["target_component_holidays"]), (False, []))) +@pytest.mark.parametrize("daily,daily_comp", ((True, ["target_component_daily"]), (False, []))) +@pytest.mark.parametrize("weekly,weekly_comp", ((True, ["target_component_weekly"]), (False, []))) +@pytest.mark.parametrize("yearly,yearly_comp", ((True, ["target_component_yearly"]), (False, []))) +def test_predict_components_names( + prophet_dfs, + regressors, + regressors_comps, + use_holidays, + holidays_comp, + daily, + daily_comp, + weekly, + weekly_comp, + yearly, + yearly_comp, + custom_seas, + custom_seas_comp, +): + _, test, holidays = prophet_dfs + + if not use_holidays: + holidays = None + + expected_columns = set( + regressors_comps + + holidays_comp + + daily_comp + + weekly_comp + + yearly_comp + + custom_seas_comp + + ["target_component_trend"] + ) + + model = _ProphetAdapter( + holidays=holidays, + daily_seasonality=daily, + weekly_seasonality=weekly, + yearly_seasonality=yearly, + additional_seasonality_params=custom_seas, + ) + model.fit(df=test, regressors=regressors) + + components = model.predict_components(df=test) + + assert set(components.columns) == expected_columns + + +@pytest.mark.long_1 +@pytest.mark.parametrize("growth,cap", (("linear", []), ("logistic", ["cap"]))) +@pytest.mark.parametrize("regressors", (["f1", "f2"], [])) +@pytest.mark.parametrize("custom_seas", ([{"name": "s1", "period": 14, "fourier_order": 1}], [])) +@pytest.mark.parametrize("use_holidays", (True, False)) +@pytest.mark.parametrize("daily", (True, False)) +@pytest.mark.parametrize("weekly", (True, False)) +@pytest.mark.parametrize("yearly", (True, False)) +def test_predict_components_sum_up_to_target( + prophet_dfs, regressors, use_holidays, daily, weekly, yearly, custom_seas, growth, cap +): + train, test, holidays = prophet_dfs + + if not use_holidays: + holidays = None + + model = _ProphetAdapter( + growth=growth, + holidays=holidays, + daily_seasonality=daily, + weekly_seasonality=weekly, + yearly_seasonality=yearly, + additional_seasonality_params=custom_seas, + ) + model.fit(df=train, regressors=regressors + cap) + + components = model.predict_components(df=test) + pred = model.predict(df=test, prediction_interval=False, quantiles=[]) + + np.testing.assert_allclose(np.sum(components, axis=1), pred["target"].values)