Skip to content

Add TrendTransform #139

Merged
merged 14 commits into from
Oct 7, 2021
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Clustering (#[110](https://github.com/tinkoff-ai/etna-ts/pull/110))
- Outliers notebook (#[123](https://github.com/tinkoff-ai/etna-ts/pull/123)))
- Method inverse_transform in TimeSeriesImputerTransform ([#135](https://github.com/tinkoff-ai/etna-ts/pull/135))
- TrendTransform ([#139](https://github.com/tinkoff-ai/etna-ts/pull/139))


### Changed
- SklearnTransform out column names ([#99](https://github.com/tinkoff-ai/etna-ts/pull/99))
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
from etna.transforms.statistics import MinTransform
from etna.transforms.statistics import QuantileTransform
from etna.transforms.statistics import StdTransform
from etna.transforms.trend import TrendTransform
182 changes: 182 additions & 0 deletions etna/transforms/trend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
from typing import Dict
from typing import List
from typing import Optional

import pandas as pd
from ruptures.base import BaseCost
from sklearn.linear_model import LinearRegression

from etna.transforms.base import PerSegmentWrapper
from etna.transforms.binseg import _Binseg
from etna.transforms.change_points_trend import BaseEstimator
from etna.transforms.change_points_trend import TDetrendModel
from etna.transforms.change_points_trend import TTimestampInterval
from etna.transforms.change_points_trend import _OneSegmentChangePointsTrendTransform


class _OneSegmentTrendTransform(_OneSegmentChangePointsTrendTransform):
"""_OneSegmentTrendTransform adds trend as a feature. Creates column 'regressor_<in_column>_trend'."""

def __init__(
self,
in_column: str,
change_point_model: BaseEstimator,
detrend_model: TDetrendModel,
**change_point_model_predict_params,
):
"""Init _OneSegmentTrendTransform.

Parameters
----------
in_column:
name of column to apply transform to
change_point_model:
model to get trend change points
detrend_model:
model to get trend from data
change_point_model_predict_params:
params for change_point_model predict method
"""
self.in_column = in_column
self.out_column = "regressor_" + in_column + "_trend"
self.change_point_model = change_point_model
self.detrend_model = detrend_model
self.per_interval_models: Optional[Dict[TTimestampInterval, TDetrendModel]] = None
self.intervals: Optional[List[TTimestampInterval]] = None
self.change_point_model_predict_params = change_point_model_predict_params
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(
in_column=self.in_column,
change_point_model=self.change_point_model,
detrend_model=self.detrend_model,
**self.change_point_model_predict_params,
)

def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add column 'regressor_<in_column>_trend' with trend, got from the detrend_model.

Parameters
----------
df:
data to get trend from

Returns
-------
pd.DataFrame:
julia-shenshina marked this conversation as resolved.
Show resolved Hide resolved
df with trend column
"""
df._is_copy = False
series = df.loc[df[self.in_column].first_valid_index() :, self.in_column]
trend_series = self._predict_per_interval_model(series=series)
df[self.out_column] = trend_series
return df

def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Inverse transform dataframe.

Parameters
----------
df:
one segment dataframe

Returns
-------
pd.DataFrame:
julia-shenshina marked this conversation as resolved.
Show resolved Hide resolved
given dataframe
"""
return df


class _TrendTransform(PerSegmentWrapper):
"""_TrendTransform adds trend as a feature. Creates column 'regressor_<in_column>_trend'."""

def __init__(
self,
in_column: str,
change_point_model: BaseEstimator,
detrend_model: TDetrendModel,
**change_point_model_predict_params,
):
"""Init _TrendTransform.

Parameters
----------
in_column:
name of column to apply transform to
change_point_model:
model to get trend change points
detrend_model:
model to get trend in data
change_point_model_predict_params:
params for change_point_model predict method
"""
self.in_column = in_column
self.change_point_model = change_point_model
self.detrend_model = detrend_model
self.change_point_model_predict_params = change_point_model_predict_params
super().__init__(
transform=_OneSegmentTrendTransform(
in_column=self.in_column,
change_point_model=self.change_point_model,
detrend_model=self.detrend_model,
**self.change_point_model_predict_params,
)
)


class TrendTransform(_TrendTransform):
"""_TrendTransform adds trend as a feature. Creates column 'regressor_<in_column>_trend'.
TrendTransform uses _Binseg model as a change point detection model in _TrendTransform.
"""

def __init__(
self,
in_column: str,
detrend_model: TDetrendModel = LinearRegression(),
model: str = "ar",
custom_cost: Optional[BaseCost] = None,
min_size: int = 2,
jump: int = 1,
n_bkps: int = 5,
pen: Optional[float] = None,
epsilon: Optional[float] = None,
):
"""Init TrendTransform.

Parameters
----------
in_column:
name of column to apply transform to
detrend_model:
model to get trend in data
model:
binseg segment model, ["l1", "l2", "rbf",...]. Not used if 'custom_cost' is not None.
custom_cost:
binseg custom cost function
min_size:
minimum segment length necessary to decide it is a stable trend segment
jump:
jump value can speed up computations: if jump==k, the algo will use every k-th value for change points search.
n_bkps:
number of change points to find
pen:
penalty value (>0)
epsilon:
reconstruction budget (>0)
"""
self.model = model
self.custom_cost = custom_cost
self.min_size = min_size
self.jump = jump
self.n_bkps = n_bkps
self.pen = pen
self.epsilon = epsilon
super().__init__(
in_column=in_column,
change_point_model=_Binseg(
model=self.model, custom_cost=self.custom_cost, min_size=self.min_size, jump=self.jump
),
detrend_model=detrend_model,
n_bkps=self.n_bkps,
pen=self.pen,
epsilon=self.epsilon,
)
85 changes: 85 additions & 0 deletions tests/test_transforms/test_trend_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from copy import deepcopy

import pandas as pd
import pytest
from sklearn.linear_model import LinearRegression

from etna.datasets.tsdataset import TSDataset
from etna.transforms.binseg import _Binseg
from etna.transforms.trend import TrendTransform
from etna.transforms.trend import _OneSegmentTrendTransform
from etna.transforms.trend import _TrendTransform

DEFAULT_SEGMENT = "segment_1"


@pytest.fixture
def df_one_segment(example_df) -> pd.DataFrame:
return example_df[example_df["segment"] == DEFAULT_SEGMENT].set_index("timestamp")


def test_fit_transform_one_segment(df_one_segment: pd.DataFrame) -> None:
"""
Test that fit_transform interface works correctly for one segment.
"""
df_one_segment_original = df_one_segment.copy()
trend_transform = _OneSegmentTrendTransform(
in_column="target", change_point_model=_Binseg(), detrend_model=LinearRegression(), n_bkps=5
)
df_one_segment = trend_transform.fit_transform(df_one_segment)
assert sorted(df_one_segment.columns) == sorted(["target", "segment", "regressor_target_trend"])
assert (df_one_segment["target"] == df_one_segment_original["target"]).all()
residue = df_one_segment["target"] - df_one_segment["regressor_target_trend"]
assert residue.mean() < 1


def test_inverse_transform_one_segment(df_one_segment: pd.DataFrame) -> None:
"""
Test that inverse_transform interface works correctly for one segment.
"""
trend_transform = _OneSegmentTrendTransform(
in_column="target", change_point_model=_Binseg(), detrend_model=LinearRegression(), n_bkps=5
)
df_one_segment_transformed = trend_transform.fit_transform(df_one_segment)
df_one_segment_inverse_transformed = trend_transform.inverse_transform(df_one_segment)
assert (df_one_segment_transformed == df_one_segment_inverse_transformed).all().all()


def test_fit_transform_many_segments(example_tsds: TSDataset) -> None:
"""
Test that fit_transform interface works correctly for many segment.
"""
example_tsds_original = deepcopy(example_tsds)
trend_transform = _TrendTransform(
in_column="target", change_point_model=_Binseg(), detrend_model=LinearRegression(), n_bkps=5
)
example_tsds.fit_transform([trend_transform])
for segment in example_tsds.segments:
segment_slice = example_tsds[:, segment, :][segment]
segment_slice_original = example_tsds_original[:, segment, :][segment]
assert sorted(segment_slice.columns) == sorted(["target", "regressor_target_trend"])
assert (segment_slice["target"] == segment_slice_original["target"]).all()
residue = segment_slice_original["target"] - segment_slice["regressor_target_trend"]
assert residue.mean() < 1


def test_inverse_transform_many_segments(example_tsds: TSDataset) -> None:
"""
Test that inverse_transform interface works correctly for many segment.
"""
trend_transform = _TrendTransform(
in_column="target", change_point_model=_Binseg(), detrend_model=LinearRegression(), n_bkps=5
)
example_tsds_transformed = example_tsds.fit_transform([trend_transform])
example_tsds_inverse_transformed = example_tsds.inverse_transform()
assert example_tsds_transformed == example_tsds_inverse_transformed


def test_transform_run(example_tsds: TSDataset) -> None:
"""
Test interface of TrendTransform.
"""
trend_transform = TrendTransform(in_column="target", detrend_model=LinearRegression(), model="rbf")
example_tsds_transformed = example_tsds.fit_transform([trend_transform])
example_tsds_inverse_transformed = example_tsds.inverse_transform()
assert example_tsds_transformed == example_tsds_inverse_transformed