From 8c38fd265d68d0e66df6cf0427c6321eca63e4da Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Wed, 5 Apr 2023 11:50:42 +0300 Subject: [PATCH 1/5] feature: add params_to_tune for sarimax model --- etna/models/sarimax.py | 28 +++++++++++++++++++++++++ tests/test_models/test_sarimax_model.py | 13 ++++++++++++ 2 files changed, 41 insertions(+) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index 36461632c..829226641 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -1,6 +1,7 @@ import warnings from abc import abstractmethod from datetime import datetime +from typing import Dict from typing import List from typing import Optional from typing import Sequence @@ -13,6 +14,7 @@ from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper from statsmodels.tsa.statespace.simulation_smoother import SimulationSmoother +from etna import SETTINGS from etna.libs.pmdarima_utils import seasonal_prediction_with_confidence from etna.models.base import BaseAdapter from etna.models.base import PredictionIntervalContextIgnorantAbstractModel @@ -20,6 +22,11 @@ from etna.models.mixins import PredictionIntervalContextIgnorantModelMixin from etna.models.utils import determine_num_steps +if SETTINGS.auto_required: + from optuna.distributions import BaseDistribution + from optuna.distributions import CategoricalDistribution + from optuna.distributions import IntUniformDistribution + warnings.filterwarnings( message="No frequency information was provided, so inferred frequency .* will be used", action="ignore", @@ -698,3 +705,24 @@ def __init__( **self.kwargs, ) ) + + def params_to_tune(self) -> Dict[str, "BaseDistribution"]: + """Get default grid for tuning hyperparameters. + + This grid doesn't tune ``seasonal_order.s`` parameter that determines number of periods in a season. + This parameter is expected to be set by the user. + + Returns + ------- + : + Grid to tune. + """ + return { + "order.0": IntUniformDistribution(low=1, high=6, step=1), + "order.1": IntUniformDistribution(low=1, high=3, step=1), + "order.2": IntUniformDistribution(low=1, high=6, step=1), + "seasonal_order.0": IntUniformDistribution(low=0, high=2, step=1), + "seasonal_order.1": IntUniformDistribution(low=0, high=1, step=1), + "seasonal_order.2": IntUniformDistribution(low=0, high=1, step=1), + "trend": CategoricalDistribution(["n", "c", "t", "ct"]), + } diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py index 90d14de27..4db5d15e8 100644 --- a/tests/test_models/test_sarimax_model.py +++ b/tests/test_models/test_sarimax_model.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from optuna.samplers import RandomSampler from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper from etna.models import SARIMAXModel @@ -236,3 +237,15 @@ def test_components_sum_up_to_target( components = components_method(df=pred_df) np.testing.assert_allclose(np.sum(components.values, axis=1), np.squeeze(pred)) + + +def test_params_to_tune(): + model = SARIMAXModel() + grid = model.params_to_tune() + # we need sampler to get a value from distribution + sampler = RandomSampler() + + assert len(grid) > 0 + for name, distribution in grid.items(): + value = sampler.sample_independent(study=None, trial=None, param_name=name, param_distribution=distribution) + _ = model.set_params(**{name: value}) From 4e4b80a3ae391a4c59a09e7d0a8cdb189d889787 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Wed, 5 Apr 2023 14:28:06 +0300 Subject: [PATCH 2/5] feature: add params_to_tune for sarimax model --- etna/models/sarimax.py | 39 +++++++++++++++---------- tests/test_models/test_sarimax_model.py | 6 ++-- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index 829226641..7b2ff6a61 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -381,9 +381,9 @@ class _SARIMAXAdapter(_SARIMAXBaseAdapter): def __init__( self, - order: Tuple[int, int, int] = (2, 1, 0), - seasonal_order: Tuple[int, int, int, int] = (1, 1, 0, 12), - trend: Optional[str] = "c", + order: Tuple[int, int, int] = (1, 0, 0), + seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), + trend: Optional[str] = None, measurement_error: bool = False, time_varying_regression: bool = False, mle_regression: bool = True, @@ -559,9 +559,9 @@ class SARIMAXModel( def __init__( self, - order: Tuple[int, int, int] = (2, 1, 0), - seasonal_order: Tuple[int, int, int, int] = (1, 1, 0, 12), - trend: Optional[str] = "c", + order: Tuple[int, int, int] = (1, 0, 0), + seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), + trend: Optional[str] = None, measurement_error: bool = False, time_varying_regression: bool = False, mle_regression: bool = True, @@ -717,12 +717,21 @@ def params_to_tune(self) -> Dict[str, "BaseDistribution"]: : Grid to tune. """ - return { - "order.0": IntUniformDistribution(low=1, high=6, step=1), - "order.1": IntUniformDistribution(low=1, high=3, step=1), - "order.2": IntUniformDistribution(low=1, high=6, step=1), - "seasonal_order.0": IntUniformDistribution(low=0, high=2, step=1), - "seasonal_order.1": IntUniformDistribution(low=0, high=1, step=1), - "seasonal_order.2": IntUniformDistribution(low=0, high=1, step=1), - "trend": CategoricalDistribution(["n", "c", "t", "ct"]), - } + num_periods = self.seasonal_order[3] + if num_periods == 0: + return { + "order.0": IntUniformDistribution(low=1, high=6, step=1), + "order.1": IntUniformDistribution(low=1, high=2, step=1), + "order.2": IntUniformDistribution(low=1, high=6, step=1), + "trend": CategoricalDistribution(["n", "c", "t", "ct"]), + } + else: + return { + "order.0": IntUniformDistribution(low=1, high=num_periods - 1, step=1), + "order.1": IntUniformDistribution(low=1, high=2, step=1), + "order.2": IntUniformDistribution(low=1, high=num_periods - 1, step=1), + "seasonal_order.0": IntUniformDistribution(low=0, high=2, step=1), + "seasonal_order.1": IntUniformDistribution(low=0, high=1, step=1), + "seasonal_order.2": IntUniformDistribution(low=0, high=1, step=1), + "trend": CategoricalDistribution(["n", "c", "t", "ct"]), + } diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py index 4db5d15e8..5c3f27653 100644 --- a/tests/test_models/test_sarimax_model.py +++ b/tests/test_models/test_sarimax_model.py @@ -239,8 +239,10 @@ def test_components_sum_up_to_target( np.testing.assert_allclose(np.sum(components.values, axis=1), np.squeeze(pred)) -def test_params_to_tune(): - model = SARIMAXModel() +@pytest.mark.parametrize( + "model", [SARIMAXModel(seasonal_order=(0, 0, 0, 0)), SARIMAXModel(seasonal_order=(0, 0, 0, 7))] +) +def test_params_to_tune(model): grid = model.params_to_tune() # we need sampler to get a value from distribution sampler = RandomSampler() From ca5e6a2b2c49ede85f88d39f85665cd41fb3aa4f Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Wed, 5 Apr 2023 14:29:45 +0300 Subject: [PATCH 3/5] chore: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13a505372..7e2b92309 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove version python-3.7 from `pyproject.toml`, update lock ([#1183](https://github.com/tinkoff-ai/etna/pull/1183)) - Add default `params_to_tune` for catboost models ([#1185](https://github.com/tinkoff-ai/etna/pull/1185)) - Add default `params_to_tune` for `ProphetModel` ([#1203](https://github.com/tinkoff-ai/etna/pull/1203)) +- Add default `params_to_tune` for `SARIMAXModel`, change default parameters for the model ([#1206](https://github.com/tinkoff-ai/etna/pull/1206)) ### Fixed - Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110)) - `ProphetModel` fails with additional seasonality set ([#1157](https://github.com/tinkoff-ai/etna/pull/1157)) From ebe69df49d1b79800714f211e835bb7fb8c27ebf Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Wed, 5 Apr 2023 15:43:08 +0300 Subject: [PATCH 4/5] test: fix tests after changing default parameters for SARIMAXModel --- .../test_outliers/test_confidence_interval_outliers.py | 7 ++++++- tests/test_models/test_sarimax_model.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py index 050810ffb..bbddf2417 100644 --- a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py +++ b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py @@ -51,7 +51,12 @@ def test_get_anomalies_prediction_interval_interface(outliers_tsds, model, in_co 0.95, {"1": [np.datetime64("2021-01-11")], "2": [np.datetime64("2021-01-09"), np.datetime64("2021-01-27")]}, ), - (SARIMAXModel, 0.999, {"1": [], "2": [np.datetime64("2021-01-27")]}), + ( + SARIMAXModel, + {}, + 0.999, + {"1": [np.datetime64("2021-01-11")], "2": [np.datetime64("2021-01-09"), np.datetime64("2021-01-27")]}, + ), ), ) def test_get_anomalies_prediction_interval_values(outliers_tsds, model, interval_width, true_anomalies, in_column): diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py index 5c3f27653..5dc6de1b1 100644 --- a/tests/test_models/test_sarimax_model.py +++ b/tests/test_models/test_sarimax_model.py @@ -170,7 +170,7 @@ def test_decomposition_hamiltonian_repr_error(dfs_w_exog, components_method_name ) @pytest.mark.parametrize("trend", (None, "t")) def test_components_names(dfs_w_exog, regressors, regressors_components, trend, components_method_name, in_sample): - expected_components = regressors_components + ["target_component_sarima"] + expected_components = regressors_components + ["target_component_arima"] train, test = dfs_w_exog pred_df = train if in_sample else test From b7e2e26e291b9d61a1fb7d5f456d1093f5adbbcc Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Wed, 5 Apr 2023 17:21:14 +0300 Subject: [PATCH 5/5] test: fix broken formatting --- .../test_outliers/test_confidence_interval_outliers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py index bbddf2417..1b8acda97 100644 --- a/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py +++ b/tests/test_analysis/test_outliers/test_confidence_interval_outliers.py @@ -53,7 +53,6 @@ def test_get_anomalies_prediction_interval_interface(outliers_tsds, model, in_co ), ( SARIMAXModel, - {}, 0.999, {"1": [np.datetime64("2021-01-11")], "2": [np.datetime64("2021-01-09"), np.datetime64("2021-01-27")]}, ),