Skip to content

Commit

Permalink
Unification of errors, warnings and checks in models (#1312)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-Geekman authored Jul 17, 2023
1 parent 6a9b6a8 commit 7e54706
Show file tree
Hide file tree
Showing 21 changed files with 586 additions and 433 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
-
-
-
- Unify errors, warnings and checks in models ([#1312](https://github.com/tinkoff-ai/etna/pull/1312))
-

### Fixed
Expand Down
17 changes: 10 additions & 7 deletions etna/models/deadline_ma.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ def get_model(self) -> "DeadlineMovingAverageModel":
"""
return self

def _check_not_used_columns(self, ts: TSDataset):
columns = set(ts.columns.get_level_values("feature"))
columns_not_used = columns.difference({"target"})
if columns_not_used:
warnings.warn(
message=f"This model doesn't work with exogenous features. "
f"Columns {columns_not_used} won't be used."
)

def fit(self, ts: TSDataset) -> "DeadlineMovingAverageModel":
"""Fit model.
Expand All @@ -109,14 +118,8 @@ def fit(self, ts: TSDataset) -> "DeadlineMovingAverageModel":
if freq not in self._freqs_available:
raise ValueError(f"Freq {freq} is not supported! Use daily or hourly frequency!")

self._check_not_used_columns(ts)
self._freq = freq

columns = set(ts.columns.get_level_values("feature"))
if columns != {"target"}:
warnings.warn(
message=f"{type(self).__name__} does not work with any exogenous series or features. "
f"It uses only target series for predict/\n "
)
return self

@staticmethod
Expand Down
36 changes: 19 additions & 17 deletions etna/models/holt_winters.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ def __init__(
self._last_train_timestamp: Optional[pd.Timestamp] = None
self._train_freq: Optional[str] = None

def _check_not_used_columns(self, df: pd.DataFrame):
columns = df.columns
columns_not_used = set(columns).difference({"target", "timestamp"})
if columns_not_used:
warnings.warn(
message=f"This model doesn't work with exogenous features. "
f"Columns {columns_not_used} won't be used."
)

def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_HoltWintersAdapter":
"""
Fit Holt-Winters' model.
Expand All @@ -217,8 +226,7 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_HoltWintersAdapter":
Fitted model
"""
self._train_freq = determine_freq(timestamps=df["timestamp"])

self._check_df(df)
self._check_not_used_columns(df)

targets = df["target"]
targets.index = df["timestamp"]
Expand Down Expand Up @@ -268,21 +276,11 @@ def predict(self, df: pd.DataFrame) -> np.ndarray:
"""
if self._result is None or self._model is None:
raise ValueError("This model is not fitted! Fit the model before calling predict method!")
self._check_df(df)

forecast = self._result.predict(start=df["timestamp"].min(), end=df["timestamp"].max())
y_pred = forecast.values
return y_pred

def _check_df(self, df: pd.DataFrame):
columns = df.columns
columns_not_used = set(columns).difference({"target", "timestamp"})
if columns_not_used:
warnings.warn(
message=f"This model does not work with exogenous features and regressors.\n "
f"{columns_not_used} will be dropped"
)

def get_model(self) -> HoltWintersResultsWrapper:
"""Get :py:class:`statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper` model that was fitted inside etna class.
Expand All @@ -303,7 +301,7 @@ def _check_mul_components(self):
if (model.trend is not None and model.trend == "mul") or (
model.seasonal is not None and model.seasonal == "mul"
):
raise ValueError("Forecast decomposition is only supported for additive components!")
raise NotImplementedError("Forecast decomposition is only supported for additive components!")

def _rescale_components(self, components: pd.DataFrame) -> pd.DataFrame:
"""Rescale components when Box-Cox transform used."""
Expand Down Expand Up @@ -335,15 +333,17 @@ def forecast_components(self, df: pd.DataFrame) -> pd.DataFrame:
raise ValueError("This model is not fitted!")

if df["timestamp"].min() <= self._last_train_timestamp:
raise ValueError("To estimate in-sample prediction decomposition use `predict` method.")
raise NotImplementedError(
"This model can't make forecast decomposition on history data! "
"Use method predict for in-sample prediction decomposition."
)

horizon = determine_num_steps(
start_timestamp=self._last_train_timestamp, end_timestamp=df["timestamp"].max(), freq=self._train_freq
)
horizon_steps = np.arange(1, horizon + 1)

self._check_mul_components()
self._check_df(df)

level = fit_result.level.values
trend = fit_result.trend.values
Expand Down Expand Up @@ -404,10 +404,12 @@ def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
raise ValueError("This model is not fitted!")

if df["timestamp"].min() < self._first_train_timestamp or df["timestamp"].max() > self._last_train_timestamp:
raise ValueError("To estimate out-of-sample prediction decomposition use `forecast` method.")
raise NotImplementedError(
"This model can't make prediction decomposition on future out-of-sample data! "
"Use method forecast for future out-of-sample prediction decomposition."
)

self._check_mul_components()
self._check_df(df)

level = fit_result.level.values
trend = fit_result.trend.values
Expand Down
5 changes: 3 additions & 2 deletions etna/models/nn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,12 +270,13 @@ def _is_prediction_with_gap(self, ts: TSDataset, horizon: int) -> bool:
def _make_target_prediction(self, ts: TSDataset, horizon: int) -> Tuple[TSDataset, DataLoader]:
if self._is_in_sample_prediction(ts=ts, horizon=horizon):
raise NotImplementedError(
"It is not possible to make in-sample predictions with DeepAR model! "
"In-sample predictions aren't supported by current implementation."
"This model can't make forecast on history data! "
"In-sample forecast isn't supported by current implementation."
)
elif self._is_prediction_with_gap(ts=ts, horizon=horizon):
first_prediction_timestamp = self._get_first_prediction_timestamp(ts=ts, horizon=horizon)
raise NotImplementedError(
"This model can't make forecast on out-of-sample data that goes after training data with a gap! "
"You can only forecast from the next point after the last one in the training dataset: "
f"last train timestamp: {self._last_train_timestamp}, first prediction timestamp is {first_prediction_timestamp}"
)
Expand Down
50 changes: 49 additions & 1 deletion etna/models/prophet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from copy import deepcopy
from datetime import datetime
from typing import Dict
Expand Down Expand Up @@ -98,6 +99,47 @@ def _create_model(self) -> "Prophet":

return model

def _check_not_used_columns(self, df: pd.DataFrame):
if self.regressor_columns is None:
raise ValueError("Something went wrong, regressor_columns is None!")

columns_not_used = [col for col in df.columns if col not in ["target", "timestamp"] + self.regressor_columns]
if columns_not_used:
warnings.warn(
message=f"This model doesn't work with exogenous features unknown in future. "
f"Columns {columns_not_used} won't be used."
)

def _select_regressors(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""Select data with regressors.
During fit there can't be regressors with NaNs, they are removed at higher level.
Look at the issue: https://github.com/tinkoff-ai/etna/issues/557
During prediction without validation NaNs in regressors lead to exception from the underlying model.
This model requires data to be in numeric dtype.
"""
if self.regressor_columns is None:
raise ValueError("Something went wrong, regressor_columns is None!")

regressors_with_nans = [regressor for regressor in self.regressor_columns if df[regressor].isna().sum() > 0]
if regressors_with_nans:
raise ValueError(
f"Regressors {regressors_with_nans} contain NaN values. "
"Try to lower horizon value, or drop these regressors."
)

if self.regressor_columns:
try:
result = df[self.regressor_columns].apply(pd.to_numeric)
except ValueError as e:
raise ValueError(f"Only convertible to numeric features are allowed! Error: {str(e)}")
else:
result = None

return result

def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter":
"""
Fits a Prophet model.
Expand All @@ -110,6 +152,8 @@ def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter":
List of the columns with regressors
"""
self.regressor_columns = regressors
self._check_not_used_columns(df)

prophet_df = self._prepare_prophet_df(df=df)
for regressor in self.regressor_columns:
if regressor not in self.predefined_regressors_names:
Expand Down Expand Up @@ -159,7 +203,11 @@ def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame:
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
prophet_df[self.regressor_columns] = df[self.regressor_columns]

regressors_data = self._select_regressors(df)
if regressors_data is not None:
prophet_df[self.regressor_columns] = regressors_data[self.regressor_columns]

return prophet_df

@staticmethod
Expand Down
Loading

1 comment on commit 7e54706

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.