Skip to content

Add default params_to_tune for DateFlagsTransform, TimeFlagsTransform, SpecialDaysTransform, FourierTransform #1228

Merged
merged 8 commits into from
Apr 18, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add default `params_to_tune` for `DeepARModel` and `TFTModel` ([#1210](https://github.com/tinkoff-ai/etna/pull/1210))
- Add default `params_to_tune` for `HoltWintersModel`, `HoltModel` and `SimpleExpSmoothingModel` ([#1209](https://github.com/tinkoff-ai/etna/pull/1209))
- Add default `params_to_tune` for `RNNModel` and `MLPModel` ([#1218](https://github.com/tinkoff-ai/etna/pull/1218))
- Add default `params_to_tune` for `DateFlagsTransform`, `TimeFlagsTransform`, `SpecialDaysTransform` and `FourierTransform` ([#1228](https://github.com/tinkoff-ai/etna/pull/1228))
### Fixed
- Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
- `ProphetModel` fails with additional seasonality set ([#1157](https://github.com/tinkoff-ai/etna/pull/1157))
Expand Down
28 changes: 28 additions & 0 deletions etna/transforms/timestamp/date_flags.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
from copy import deepcopy
from math import ceil
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence

import numpy as np
import pandas as pd

from etna import SETTINGS
from etna.transforms.base import FutureMixin
from etna.transforms.base import IrreversibleTransform

if SETTINGS.auto_required:
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution


class DateFlagsTransform(IrreversibleTransform, FutureMixin):
"""DateFlagsTransform is a class that implements extraction of the main date-based features from datetime column.
Expand Down Expand Up @@ -345,5 +351,27 @@ def _get_weekends(timestamp_series: pd.Series) -> np.ndarray:
weekend_days = (5, 6)
return timestamp_series.apply(lambda x: x.weekday() in weekend_days).values

def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
"""Get default grid for tuning hyperparameters.

There are no restrictions on all ``False`` values for the flags.

Returns
-------
:
Grid to tune.
"""
return {
"day_number_in_week": CategoricalDistribution([False, True]),
"day_number_in_month": CategoricalDistribution([False, True]),
"day_number_in_year": CategoricalDistribution([False, True]),
"week_number_in_month": CategoricalDistribution([False, True]),
"week_number_in_year": CategoricalDistribution([False, True]),
"month_number_in_year": CategoricalDistribution([False, True]),
"season_number": CategoricalDistribution([False, True]),
"year_number": CategoricalDistribution([False, True]),
"is_weekend": CategoricalDistribution([False, True]),
}


__all__ = ["DateFlagsTransform"]
39 changes: 33 additions & 6 deletions etna/transforms/timestamp/fourier.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import math
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence

import numpy as np
import pandas as pd

from etna import SETTINGS
from etna.transforms.base import FutureMixin
from etna.transforms.base import IrreversibleTransform

if SETTINGS.auto_required:
from optuna.distributions import BaseDistribution
from optuna.distributions import IntLogUniformDistribution


class FourierTransform(IrreversibleTransform, FutureMixin):
"""Adds fourier features to the dataset.
Expand Down Expand Up @@ -72,19 +78,22 @@ def __init__(
if period < 2:
raise ValueError("Period should be at least 2")
self.period = period
self.mods: Sequence[int]

self.order = order
self.mods = mods
self._mods: Sequence[int]

if order is not None and mods is None:
if order < 1 or order > math.ceil(period / 2):
raise ValueError("Order should be within [1, ceil(period/2)] range")
self.mods = [mod for mod in range(1, 2 * order + 1) if mod < period]
self._mods = [mod for mod in range(1, 2 * order + 1) if mod < period]
elif mods is not None and order is None:
if min(mods) < 1 or max(mods) >= period:
raise ValueError("Every mod should be within [1, int(period)) range")
self.mods = mods
self._mods = mods
else:
raise ValueError("There should be exactly one option set: order or mods")
self.order = None

self.out_column = out_column
super().__init__(required_features=["target"])

Expand All @@ -96,7 +105,7 @@ def _get_column_name(self, mod: int) -> str:

def get_regressors_info(self) -> List[str]:
"""Return the list with regressors created by the transform."""
output_columns = [self._get_column_name(mod=mod) for mod in self.mods]
output_columns = [self._get_column_name(mod=mod) for mod in self._mods]
return output_columns

def _fit(self, df: pd.DataFrame) -> "FourierTransform":
Expand Down Expand Up @@ -143,10 +152,28 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
features = pd.DataFrame(index=df.index)
elapsed = np.arange(features.shape[0]) / self.period

for mod in self.mods:
for mod in self._mods:
order = (mod + 1) // 2
is_cos = mod % 2 == 0

features[self._get_column_name(mod)] = np.sin(2 * np.pi * order * elapsed + np.pi / 2 * is_cos)

return self._construct_answer(df, features)

def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
"""Get default grid for tuning hyperparameters.

* If the ``mods`` parameter is set, then the grid is empty.

* If the ``order`` parameter is set, ``period`` parameter isn't tuned. It is expected to be set by the user.

Returns
-------
:
Grid to tune.
"""
if self.mods is not None:
return {}

max_value = math.ceil(self.period / 2)
return {"order": IntLogUniformDistribution(low=1, high=max_value)}
20 changes: 20 additions & 0 deletions etna/transforms/timestamp/special_days.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@

import pandas as pd

from etna import SETTINGS
from etna.transforms.base import FutureMixin
from etna.transforms.base import IrreversiblePerSegmentWrapper
from etna.transforms.base import OneSegmentTransform

if SETTINGS.auto_required:
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution


def calc_day_number_in_week(datetime_day: datetime.datetime) -> int:
return datetime_day.weekday()
Expand Down Expand Up @@ -216,5 +221,20 @@ def get_regressors_info(self) -> List[str]:
output_columns.append("anomaly_monthdays")
return output_columns

def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
"""Get default grid for tuning hyperparameters.

There are no restrictions on all ``False`` values for the flags.

Returns
-------
:
Grid to tune.
"""
return {
"find_special_weekday": CategoricalDistribution([False, True]),
"find_special_month_day": CategoricalDistribution([False, True]),
}


__all__ = ["SpecialDaysTransform"]
25 changes: 25 additions & 0 deletions etna/transforms/timestamp/time_flags.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from copy import deepcopy
from typing import Dict
from typing import List
from typing import Optional

import numpy as np
import pandas as pd

from etna import SETTINGS
from etna.transforms.base import FutureMixin
from etna.transforms.base import IrreversibleTransform

if SETTINGS.auto_required:
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution


class TimeFlagsTransform(IrreversibleTransform, FutureMixin):
"""TimeFlagsTransform is a class that implements extraction of the main time-based features from datetime column."""
Expand Down Expand Up @@ -202,5 +208,24 @@ def _get_period_in_day(timestamp_series: pd.Series, period_in_hours: int = 12) -
"""
return timestamp_series.apply(lambda x: x.hour // period_in_hours).values

def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
"""Get default grid for tuning hyperparameters.

There are no restrictions on all ``False`` values for the flags.

Returns
-------
:
Grid to tune.
"""
return {
"minute_in_hour_number": CategoricalDistribution([False, True]),
"fifteen_minutes_in_hour_number": CategoricalDistribution([False, True]),
"hour_number": CategoricalDistribution([False, True]),
"half_hour_number": CategoricalDistribution([False, True]),
"half_day_number": CategoricalDistribution([False, True]),
"one_third_day_number": CategoricalDistribution([False, True]),
}


__all__ = ["TimeFlagsTransform"]
10 changes: 10 additions & 0 deletions tests/test_pipeline/test_autoregressive_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
import pytest
from optuna.distributions import CategoricalDistribution
from optuna.distributions import IntUniformDistribution
from optuna.distributions import LogUniformDistribution

Expand Down Expand Up @@ -394,6 +395,15 @@ def test_predict_return_components(example_tsds, model_fixture, request):
"model.depth": IntUniformDistribution(low=1, high=11, step=1),
"model.l2_leaf_reg": LogUniformDistribution(low=0.1, high=200.0),
"model.random_strength": LogUniformDistribution(low=1e-05, high=10.0),
"transforms.0.day_number_in_week": CategoricalDistribution([False, True]),
"transforms.0.day_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.day_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.month_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.season_number": CategoricalDistribution([False, True]),
"transforms.0.year_number": CategoricalDistribution([False, True]),
"transforms.0.is_weekend": CategoricalDistribution([False, True]),
},
),
],
Expand Down
45 changes: 45 additions & 0 deletions tests/test_pipeline/test_hierarchical_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import numpy as np
import pandas as pd
import pytest
from optuna.distributions import CategoricalDistribution
from optuna.distributions import IntUniformDistribution
from optuna.distributions import LogUniformDistribution

from etna.datasets.utils import match_target_quantiles
from etna.metrics import MAE
Expand Down Expand Up @@ -441,3 +444,45 @@ def test_raw_forecast_with_return_components(product_level_constant_hierarchical
)
pipeline.fit(product_level_constant_hierarchical_ts)
pipeline.raw_forecast(ts=product_level_constant_hierarchical_ts, return_components=True)


@pytest.mark.parametrize(
"reconciliator",
(
TopDownReconciliator(target_level="product", source_level="market", period=1, method="AHP"),
TopDownReconciliator(target_level="product", source_level="market", period=1, method="PHA"),
BottomUpReconciliator(target_level="market", source_level="product"),
BottomUpReconciliator(target_level="total", source_level="market"),
),
)
@pytest.mark.parametrize(
"model, transforms, expected_params_to_tune",
[
(
CatBoostMultiSegmentModel(iterations=100),
[DateFlagsTransform(), LagTransform(in_column="target", lags=list(range(3, 10)))],
{
"model.learning_rate": LogUniformDistribution(low=1e-4, high=0.5),
"model.depth": IntUniformDistribution(low=1, high=11, step=1),
"model.l2_leaf_reg": LogUniformDistribution(low=0.1, high=200.0),
"model.random_strength": LogUniformDistribution(low=1e-05, high=10.0),
"transforms.0.day_number_in_week": CategoricalDistribution([False, True]),
"transforms.0.day_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.day_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.month_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.season_number": CategoricalDistribution([False, True]),
"transforms.0.year_number": CategoricalDistribution([False, True]),
"transforms.0.is_weekend": CategoricalDistribution([False, True]),
},
),
],
)
def test_params_to_tune(reconciliator, model, transforms, expected_params_to_tune):
horizon = 1
pipeline = HierarchicalPipeline(reconciliator=reconciliator, model=model, transforms=transforms, horizon=horizon)

obtained_params_to_tune = pipeline.params_to_tune()

assert obtained_params_to_tune == expected_params_to_tune
10 changes: 10 additions & 0 deletions tests/test_pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
import pytest
from optuna.distributions import CategoricalDistribution
from optuna.distributions import IntUniformDistribution
from optuna.distributions import LogUniformDistribution

Expand Down Expand Up @@ -1221,6 +1222,15 @@ def test_predict_return_components(
"model.depth": IntUniformDistribution(low=1, high=11, step=1),
"model.l2_leaf_reg": LogUniformDistribution(low=0.1, high=200.0),
"model.random_strength": LogUniformDistribution(low=1e-05, high=10.0),
"transforms.0.day_number_in_week": CategoricalDistribution([False, True]),
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
"transforms.0.day_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.day_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_month": CategoricalDistribution([False, True]),
"transforms.0.week_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.month_number_in_year": CategoricalDistribution([False, True]),
"transforms.0.season_number": CategoricalDistribution([False, True]),
"transforms.0.year_number": CategoricalDistribution([False, True]),
"transforms.0.is_weekend": CategoricalDistribution([False, True]),
},
),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from etna.datasets import TSDataset
from etna.transforms.timestamp import DateFlagsTransform
from tests.test_transforms.utils import assert_sampling_is_valid
from tests.test_transforms.utils import assert_transformation_equals_loaded_original

WEEKEND_DAYS = (5, 6)
Expand Down Expand Up @@ -289,3 +290,10 @@ def test_save_load(train_ts):
ts = train_ts
transform = DateFlagsTransform()
assert_transformation_equals_loaded_original(transform=transform, ts=ts)


def test_params_to_tune(train_ts):
transform = DateFlagsTransform()
ts = train_ts
assert len(transform.params_to_tune()) > 0
assert_sampling_is_valid(transform=transform, ts=ts)
23 changes: 20 additions & 3 deletions tests/test_transforms/test_timestamp/test_fourier_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from etna.metrics import R2
from etna.models import LinearPerSegmentModel
from etna.transforms.timestamp import FourierTransform
from tests.test_transforms.utils import assert_sampling_is_valid
from tests.test_transforms.utils import assert_transformation_equals_loaded_original


Expand Down Expand Up @@ -49,15 +50,15 @@ def ts_trend_seasonal(random_seed) -> TSDataset:
return TSDataset(TSDataset.to_dataset(classic_df), freq="D")


@pytest.mark.parametrize("order, mods, repr_mods", [(None, [1, 2, 3, 4], [1, 2, 3, 4]), (2, None, [1, 2, 3, 4])])
def test_repr(order, mods, repr_mods):
@pytest.mark.parametrize("order, mods", [(None, [1, 2, 3, 4]), (2, None)])
def test_repr(order, mods):
transform = FourierTransform(
period=10,
order=order,
mods=mods,
)
transform_repr = transform.__repr__()
true_repr = f"FourierTransform(period = 10, order = None, mods = {repr_mods}, out_column = None, )"
true_repr = f"FourierTransform(period = 10, order = {order}, mods = {mods}, out_column = None, )"
assert transform_repr == true_repr


Expand Down Expand Up @@ -166,3 +167,19 @@ def test_forecast(ts_trend_seasonal):
def test_save_load(ts_trend_seasonal):
transform = FourierTransform(period=7, order=3)
assert_transformation_equals_loaded_original(transform=transform, ts=ts_trend_seasonal)


@pytest.mark.parametrize(
"transform, expected_length",
[
(FourierTransform(period=7, order=1), 1),
(FourierTransform(period=7, mods=[1]), 0),
(FourierTransform(period=7, mods=[1, 4]), 0),
(FourierTransform(period=30.4, order=1), 1),
(FourierTransform(period=365.25, order=1), 1),
],
)
def test_params_to_tune(transform, expected_length, ts_trend_seasonal):
ts = ts_trend_seasonal
assert len(transform.params_to_tune()) == expected_length
assert_sampling_is_valid(transform=transform, ts=ts)
Loading