From 1e5192064b06a3dfa94cf4a4b154d2d79381756b Mon Sep 17 00:00:00 2001 From: Mr-Geekman <36005824+Mr-Geekman@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:24:43 +0300 Subject: [PATCH] Add `params_to_tune` for `SARIMAXModel` model (#1206) --- CHANGELOG.md | 1 + etna/models/sarimax.py | 49 ++++++++++++++++--- .../test_confidence_interval_outliers.py | 6 ++- tests/test_models/test_sarimax_model.py | 17 ++++++- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 229cf2480..deaf753b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove version python-3.7 from `pyproject.toml`, update lock ([#1183](https://github.com/tinkoff-ai/etna/pull/1183)) - Add default `params_to_tune` for catboost models ([#1185](https://github.com/tinkoff-ai/etna/pull/1185)) - Add default `params_to_tune` for `ProphetModel` ([#1203](https://github.com/tinkoff-ai/etna/pull/1203)) +- Add default `params_to_tune` for `SARIMAXModel`, change default parameters for the model ([#1206](https://github.com/tinkoff-ai/etna/pull/1206)) - Add default `params_to_tune` for linear models ([#1204](https://github.com/tinkoff-ai/etna/pull/1204)) ### Fixed - Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110)) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index 36461632c..7b2ff6a61 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -1,6 +1,7 @@ import warnings from abc import abstractmethod from datetime import datetime +from typing import Dict from typing import List from typing import Optional from typing import Sequence @@ -13,6 +14,7 @@ from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper from statsmodels.tsa.statespace.simulation_smoother import SimulationSmoother +from etna import SETTINGS from etna.libs.pmdarima_utils import seasonal_prediction_with_confidence from etna.models.base import BaseAdapter from etna.models.base import PredictionIntervalContextIgnorantAbstractModel @@ -20,6 +22,11 @@ from etna.models.mixins import PredictionIntervalContextIgnorantModelMixin from etna.models.utils import determine_num_steps +if SETTINGS.auto_required: + from optuna.distributions import BaseDistribution + from optuna.distributions import CategoricalDistribution + from optuna.distributions import IntUniformDistribution + warnings.filterwarnings( message="No frequency information was provided, so inferred frequency .* will be used", action="ignore", @@ -374,9 +381,9 @@ class _SARIMAXAdapter(_SARIMAXBaseAdapter): def __init__( self, - order: Tuple[int, int, int] = (2, 1, 0), - seasonal_order: Tuple[int, int, int, int] = (1, 1, 0, 12), - trend: Optional[str] = "c", + order: Tuple[int, int, int] = (1, 0, 0), + seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), + trend: Optional[str] = None, measurement_error: bool = False, time_varying_regression: bool = False, mle_regression: bool = True, @@ -552,9 +559,9 @@ class SARIMAXModel( def __init__( self, - order: Tuple[int, int, int] = (2, 1, 0), - seasonal_order: Tuple[int, int, int, int] = (1, 1, 0, 12), - trend: Optional[str] = "c", + order: Tuple[int, int, int] = (1, 0, 0), + seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), + trend: Optional[str] = None, measurement_error: bool = False, time_varying_regression: bool = False, mle_regression: bool = True, @@ -698,3 +705,33 @@ def __init__( **self.kwargs, ) ) + + def params_to_tune(self) -> Dict[str, "BaseDistribution"]: + """Get default grid for tuning hyperparameters. + + This grid doesn't tune ``seasonal_order.s`` parameter that determines number of periods in a season. + This parameter is expected to be set by the user. + + Returns + ------- + : + Grid to tune. + """ + num_periods = self.seasonal_order[3] + if num_periods == 0: + return { + "order.0": IntUniformDistribution(low=1, high=6, step=1), + "order.1": IntUniformDistribution(low=1, high=2, step=1), + "order.2": IntUniformDistribution(low=1, high=6, step=1), + "trend": CategoricalDistribution(["n", "c", "t", "ct"]), + } + else: + return { + "order.0": IntUniformDistribution(low=1, high=num_periods - 1, step=1), + "order.1": IntUniformDistribution(low=1, high=2, step=1), + "order.2": IntUniformDistribution(low=1, high=num_periods - 1, step=1), + "seasonal_order.0": IntUniformDistribution(low=0, high=2, step=1), + "seasonal_order.1": IntUniformDistribution(low=0, high=1, step=1), + "seasonal_order.2": IntUniformDistribution(low=0, high=1, step=1), + "trend": CategoricalDistribution(["n", "c", "t", "ct"]), + } diff --git a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py index 050810ffb..1b8acda97 100644 --- a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py +++ b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py @@ -51,7 +51,11 @@ def test_get_anomalies_prediction_interval_interface(outliers_tsds, model, in_co 0.95, {"1": [np.datetime64("2021-01-11")], "2": [np.datetime64("2021-01-09"), np.datetime64("2021-01-27")]}, ), - (SARIMAXModel, 0.999, {"1": [], "2": [np.datetime64("2021-01-27")]}), + ( + SARIMAXModel, + 0.999, + {"1": [np.datetime64("2021-01-11")], "2": [np.datetime64("2021-01-09"), np.datetime64("2021-01-27")]}, + ), ), ) def test_get_anomalies_prediction_interval_values(outliers_tsds, model, interval_width, true_anomalies, in_column): diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py index 90d14de27..5dc6de1b1 100644 --- a/tests/test_models/test_sarimax_model.py +++ b/tests/test_models/test_sarimax_model.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from optuna.samplers import RandomSampler from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper from etna.models import SARIMAXModel @@ -169,7 +170,7 @@ def test_decomposition_hamiltonian_repr_error(dfs_w_exog, components_method_name ) @pytest.mark.parametrize("trend", (None, "t")) def test_components_names(dfs_w_exog, regressors, regressors_components, trend, components_method_name, in_sample): - expected_components = regressors_components + ["target_component_sarima"] + expected_components = regressors_components + ["target_component_arima"] train, test = dfs_w_exog pred_df = train if in_sample else test @@ -236,3 +237,17 @@ def test_components_sum_up_to_target( components = components_method(df=pred_df) np.testing.assert_allclose(np.sum(components.values, axis=1), np.squeeze(pred)) + + +@pytest.mark.parametrize( + "model", [SARIMAXModel(seasonal_order=(0, 0, 0, 0)), SARIMAXModel(seasonal_order=(0, 0, 0, 7))] +) +def test_params_to_tune(model): + grid = model.params_to_tune() + # we need sampler to get a value from distribution + sampler = RandomSampler() + + assert len(grid) > 0 + for name, distribution in grid.items(): + value = sampler.sample_independent(study=None, trial=None, param_name=name, param_distribution=distribution) + _ = model.set_params(**{name: value})