From 985fedc194ff6ce348cb346a0a8a3aa36b6ee803 Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 8 Feb 2023 11:15:50 +0300 Subject: [PATCH 01/13] not to discard on checkout --- etna/auto/auto.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 1188a135e..01fa28542 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -37,6 +37,22 @@ def __call__(self, pipeline: Pipeline) -> None: ... +class AutoBase: + """Base Class for Auto""" + + def __init__(): + pass + + def top_k(k: int): + pass + + def summary(): + pass + + def objective(): + pass + + class Auto: """Automatic pipeline selection via defined or custom pipeline pool.""" From 9ebe0e034464b0d0b5a8b7cbaa6e9d1e0f370372 Mon Sep 17 00:00:00 2001 From: gooseit Date: Thu, 9 Feb 2023 03:10:32 +0300 Subject: [PATCH 02/13] save_changes_before_checkout --- etna/auto/auto.py | 259 +++++++++++++++++++++++++++++----------------- 1 file changed, 166 insertions(+), 93 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 01fa28542..8f3add4f6 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -26,6 +26,9 @@ from etna.metrics.utils import aggregate_metrics_df from etna.pipeline import Pipeline +from abc import ABC +from abc import abstractmethod + class _Callback(Protocol): def __call__(self, metrics_df: pd.DataFrame, forecast_df: pd.DataFrame, fold_info_df: pd.DataFrame) -> None: @@ -38,22 +41,176 @@ def __call__(self, pipeline: Pipeline) -> None: class AutoBase: - """Base Class for Auto""" + """Base Class for Auto and Tune, implementing core logic behind these classes""" + + def fit( + self, + ts: TSDataset, + timeout: Optional[int] = None, + n_trials: Optional[int] = None, + initializer: Optional[_Initializer] = None, + callback: Optional[_Callback] = None, + **optuna_kwargs, + ) -> Pipeline: + """ + Start automatic pipeline selection. + + Parameters + ---------- + ts: + tsdataset to fit on + timeout: + timeout for optuna. N.B. this is timeout for each worker + n_trials: + number of trials for optuna. N.B. this is number of trials for each worker + initializer: + is called before each pipeline backtest, can be used to initialize loggers + callback: + is called after each pipeline backtest, can be used to log extra metrics + optuna_kwargs: + additional kwargs for optuna :py:meth:`optuna.study.Study.optimize` + """ + if self._optuna is None: + self._optuna = self._init_optuna() + + self._optuna.tune( + objective=self.objective( + ts=ts, + target_metric=self.target_metric, + metric_aggregation=self.metric_aggregation, + metrics=self.metrics, + backtest_params=self.backtest_params, + initializer=initializer, + callback=callback, + ), + runner=self.runner, + n_trials=n_trials, + timeout=timeout, + **optuna_kwargs, + ) + + return get_from_params(**self._optuna.study.best_trial.user_attrs["pipeline"]) + + def summary(self) -> pd.DataFrame: + """Get Auto trials summary.""" + if self._optuna is None: + self._optuna = self._init_optuna() + + study = self._optuna.study.get_trials() + + study_params = [ + {**trial.user_attrs, "pipeline": get_from_params(**trial.user_attrs["pipeline"]), "state": trial.state} + for trial in study + ] + + return pd.DataFrame(study_params) + + def top_k(self, k: int = 5) -> List[Pipeline]: + """ + Get top k pipelines. + + Parameters + ---------- + k: + number of pipelines to return + """ + summary = self.summary() + df = summary.sort_values( + by=[f"{self.target_metric.name}_{self.metric_aggregation}"], + ascending=(not self.target_metric.greater_is_better), + ) + return [pipeline for pipeline in df["pipeline"].values[:k]] # noqa: C416 + + +class AutoAbstract(ABC): + """Interface for Auto object""" + + @abstractmethod + def fit( + self, + ts: TSDataset, + timeout: Optional[int] = None, + n_trials: Optional[int] = None, + initializer: Optional[_Initializer] = None, + callback: Optional[_Callback] = None, + **optuna_kwargs, + ) -> Pipeline: + """ + Start automatic pipeline selection. + + Parameters + ---------- + ts: + tsdataset to fit on + timeout: + timeout for optuna. N.B. this is timeout for each worker + n_trials: + number of trials for optuna. N.B. this is number of trials for each worker + initializer: + is called before each pipeline backtest, can be used to initialize loggers + callback: + is called after each pipeline backtest, can be used to log extra metrics + optuna_kwargs: + additional kwargs for optuna :py:meth:`optuna.study.Study.optimize` + """ + pass + + @abstractmethod + def _init_optuna(self): + """Initialize optuna.""" - def __init__(): + @abstractmethod + def summary(self) -> pd.DataFrame: + """Get Auto trials summary.""" pass - - def top_k(k: int): - pass - - def summary(): + + @abstractmethod + def top_k(self, k: int = 5) -> List[Pipeline]: + """ + Get top k pipelines. + + Parameters + ---------- + k: + number of pipelines to return + """ pass - def objective(): + @abstractmethod + @staticmethod + def objective( + ts: TSDataset, + target_metric: Metric, + metric_aggregation: MetricAggregationStatistics, + metrics: List[Metric], + backtest_params: dict, + initializer: Optional[_Initializer] = None, + callback: Optional[_Callback] = None, + ) -> Callable[[Trial], float]: + """ + Optuna objective wrapper. + + Parameters + ---------- + ts: + tsdataset to fit on + target_metric: + metric to optimize + metric_aggregation: + aggregation method for per-segment metrics + metrics: + list of metrics to compute + backtest_params: + custom parameters for backtest instead of default backtest parameters + initializer: + is called before each pipeline backtest, can be used to initialize loggers + callback: + is called after each pipeline backtest, can be used to log extra metrics + """ pass -class Auto: +class Auto(AutoBase, AutoAbstract): """Automatic pipeline selection via defined or custom pipeline pool.""" def __init__( @@ -175,87 +332,3 @@ def _init_optuna(self): ) return optuna - def summary(self) -> pd.DataFrame: - """Get Auto trials summary.""" - if self._optuna is None: - self._optuna = self._init_optuna() - - study = self._optuna.study.get_trials() - - study_params = [ - {**trial.user_attrs, "pipeline": get_from_params(**trial.user_attrs["pipeline"]), "state": trial.state} - for trial in study - ] - - return pd.DataFrame(study_params) - - def top_k(self, k: int = 5) -> List[Pipeline]: - """ - Get top k pipelines. - - Parameters - ---------- - k: - number of pipelines to return - """ - summary = self.summary() - df = summary.sort_values( - by=[f"{self.target_metric.name}_{self.metric_aggregation}"], - ascending=(not self.target_metric.greater_is_better), - ) - return [pipeline for pipeline in df["pipeline"].values[:k]] # noqa: C416 - - @staticmethod - def objective( - ts: TSDataset, - target_metric: Metric, - metric_aggregation: MetricAggregationStatistics, - metrics: List[Metric], - backtest_params: dict, - initializer: Optional[_Initializer] = None, - callback: Optional[_Callback] = None, - ) -> Callable[[Trial], float]: - """ - Optuna objective wrapper. - - Parameters - ---------- - ts: - tsdataset to fit on - target_metric: - metric to optimize - metric_aggregation: - aggregation method for per-segment metrics - metrics: - list of metrics to compute - backtest_params: - custom parameters for backtest instead of default backtest parameters - initializer: - is called before each pipeline backtest, can be used to initialize loggers - callback: - is called after each pipeline backtest, can be used to log extra metrics - """ - - def _objective(trial: Trial) -> float: - - pipeline_config = dict() - pipeline_config.update(trial.relative_params) - pipeline_config.update(trial.params) - - pipeline: Pipeline = get_from_params(**pipeline_config) - if initializer is not None: - initializer(pipeline=pipeline) - - metrics_df, forecast_df, fold_info_df = pipeline.backtest(ts, metrics=metrics, **backtest_params) - - if callback is not None: - callback(metrics_df=metrics_df, forecast_df=forecast_df, fold_info_df=fold_info_df) - - aggregated_metrics = aggregate_metrics_df(metrics_df) - - for metric in aggregated_metrics: - trial.set_user_attr(metric, aggregated_metrics[metric]) - - return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] - - return _objective From 8f6562b57e729ba8d7df7d50fb3e7f03e61a7b4e Mon Sep 17 00:00:00 2001 From: gooseit Date: Tue, 14 Feb 2023 18:12:02 +0300 Subject: [PATCH 03/13] create AutoBase, AutoAbstract classes --- CHANGELOG.md | 2 +- etna/auto/auto.py | 122 ++++++++++++++++++++++++---------------------- 2 files changed, 64 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcece2350..c0ab9af2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ### Changed -- +- Create AutoBase and AutoAbstract classes, some of Auto class's logic moved there ([#1023](https://github.com/tinkoff-ai/etna/pull/1023)) - ### Fixed diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 8f3add4f6..0ee752a59 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -1,3 +1,5 @@ +from abc import ABC +from abc import abstractmethod from typing import Callable from typing import List from typing import Optional @@ -26,9 +28,6 @@ from etna.metrics.utils import aggregate_metrics_df from etna.pipeline import Pipeline -from abc import ABC -from abc import abstractmethod - class _Callback(Protocol): def __call__(self, metrics_df: pd.DataFrame, forecast_df: pd.DataFrame, fold_info_df: pd.DataFrame) -> None: @@ -42,54 +41,6 @@ def __call__(self, pipeline: Pipeline) -> None: class AutoBase: """Base Class for Auto and Tune, implementing core logic behind these classes""" - - def fit( - self, - ts: TSDataset, - timeout: Optional[int] = None, - n_trials: Optional[int] = None, - initializer: Optional[_Initializer] = None, - callback: Optional[_Callback] = None, - **optuna_kwargs, - ) -> Pipeline: - """ - Start automatic pipeline selection. - - Parameters - ---------- - ts: - tsdataset to fit on - timeout: - timeout for optuna. N.B. this is timeout for each worker - n_trials: - number of trials for optuna. N.B. this is number of trials for each worker - initializer: - is called before each pipeline backtest, can be used to initialize loggers - callback: - is called after each pipeline backtest, can be used to log extra metrics - optuna_kwargs: - additional kwargs for optuna :py:meth:`optuna.study.Study.optimize` - """ - if self._optuna is None: - self._optuna = self._init_optuna() - - self._optuna.tune( - objective=self.objective( - ts=ts, - target_metric=self.target_metric, - metric_aggregation=self.metric_aggregation, - metrics=self.metrics, - backtest_params=self.backtest_params, - initializer=initializer, - callback=callback, - ), - runner=self.runner, - n_trials=n_trials, - timeout=timeout, - **optuna_kwargs, - ) - - return get_from_params(**self._optuna.study.best_trial.user_attrs["pipeline"]) def summary(self) -> pd.DataFrame: """Get Auto trials summary.""" @@ -124,7 +75,7 @@ def top_k(self, k: int = 5) -> List[Pipeline]: class AutoAbstract(ABC): """Interface for Auto object""" - + @abstractmethod def fit( self, @@ -154,17 +105,17 @@ def fit( additional kwargs for optuna :py:meth:`optuna.study.Study.optimize` """ pass - + @abstractmethod def _init_optuna(self): """Initialize optuna.""" - + @abstractmethod def summary(self) -> pd.DataFrame: """Get Auto trials summary.""" pass - - @abstractmethod + + @abstractmethod def top_k(self, k: int = 5) -> List[Pipeline]: """ Get top k pipelines. @@ -175,9 +126,9 @@ def top_k(self, k: int = 5) -> List[Pipeline]: number of pipelines to return """ pass - - @abstractmethod + @staticmethod + @abstractmethod def objective( ts: TSDataset, target_metric: Metric, @@ -315,6 +266,60 @@ def fit( return get_from_params(**self._optuna.study.best_trial.user_attrs["pipeline"]) + @staticmethod + def objective( + ts: TSDataset, + target_metric: Metric, + metric_aggregation: MetricAggregationStatistics, + metrics: List[Metric], + backtest_params: dict, + initializer: Optional[_Initializer] = None, + callback: Optional[_Callback] = None, + ) -> Callable[[Trial], float]: + """ + Optuna objective wrapper. + Parameters + ---------- + ts: + tsdataset to fit on + target_metric: + metric to optimize + metric_aggregation: + aggregation method for per-segment metrics + metrics: + list of metrics to compute + backtest_params: + custom parameters for backtest instead of default backtest parameters + initializer: + is called before each pipeline backtest, can be used to initialize loggers + callback: + is called after each pipeline backtest, can be used to log extra metrics + """ + + def _objective(trial: Trial) -> float: + + pipeline_config = dict() + pipeline_config.update(trial.relative_params) + pipeline_config.update(trial.params) + + pipeline: Pipeline = get_from_params(**pipeline_config) + if initializer is not None: + initializer(pipeline=pipeline) + + metrics_df, forecast_df, fold_info_df = pipeline.backtest(ts, metrics=metrics, **backtest_params) + + if callback is not None: + callback(metrics_df=metrics_df, forecast_df=forecast_df, fold_info_df=fold_info_df) + + aggregated_metrics = aggregate_metrics_df(metrics_df) + + for metric in aggregated_metrics: + trial.set_user_attr(metric, aggregated_metrics[metric]) + + return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] + + return _objective + def _init_optuna(self): """Initialize optuna.""" if isinstance(self.pool, Pool): @@ -331,4 +336,3 @@ def _init_optuna(self): sampler=ConfigSampler(configs=pool_), ) return optuna - From 27adcc9a127571aa9d98009f0b2003860d4a9349 Mon Sep 17 00:00:00 2001 From: gooseit Date: Tue, 14 Feb 2023 18:16:56 +0300 Subject: [PATCH 04/13] lint --- etna/auto/auto.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 0ee752a59..cd9559b55 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -40,7 +40,7 @@ def __call__(self, pipeline: Pipeline) -> None: class AutoBase: - """Base Class for Auto and Tune, implementing core logic behind these classes""" + """Base Class for Auto and Tune, implementing core logic behind these classes.""" def summary(self) -> pd.DataFrame: """Get Auto trials summary.""" @@ -74,7 +74,7 @@ def top_k(self, k: int = 5) -> List[Pipeline]: class AutoAbstract(ABC): - """Interface for Auto object""" + """Interface for Auto object.""" @abstractmethod def fit( From 64fb4b4876b40fcf8bab15fb6385cf626e65da04 Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 15 Feb 2023 16:12:15 +0300 Subject: [PATCH 05/13] lint_2 --- etna/auto/auto.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index cd9559b55..cf1be243a 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -72,6 +72,23 @@ def top_k(self, k: int = 5) -> List[Pipeline]: ) return [pipeline for pipeline in df["pipeline"].values[:k]] # noqa: C416 + def _init_optuna(self): + """Initialize optuna.""" + if isinstance(self.pool, Pool): + pool: List[Pipeline] = self.pool.value.generate(horizon=self.horizon) + else: + pool = self.pool + + pool_ = [pipeline.to_dict() for pipeline in pool] + + optuna = Optuna( + direction="maximize" if self.target_metric.greater_is_better else "minimize", + study_name=self.experiment_folder, + storage=self.storage, + sampler=ConfigSampler(configs=pool_), + ) + return optuna + class AutoAbstract(ABC): """Interface for Auto object.""" @@ -319,20 +336,3 @@ def _objective(trial: Trial) -> float: return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] return _objective - - def _init_optuna(self): - """Initialize optuna.""" - if isinstance(self.pool, Pool): - pool: List[Pipeline] = self.pool.value.generate(horizon=self.horizon) - else: - pool = self.pool - - pool_ = [pipeline.to_dict() for pipeline in pool] - - optuna = Optuna( - direction="maximize" if self.target_metric.greater_is_better else "minimize", - study_name=self.experiment_folder, - storage=self.storage, - sampler=ConfigSampler(configs=pool_), - ) - return optuna From b3cafc7cfe7e7998b2d550e3d14ebca8dfdf8925 Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 15 Feb 2023 16:32:46 +0300 Subject: [PATCH 06/13] lint_3 --- etna/auto/auto.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index cf1be243a..6af2c6680 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -41,7 +41,17 @@ def __call__(self, pipeline: Pipeline) -> None: class AutoBase: """Base Class for Auto and Tune, implementing core logic behind these classes.""" - + + def __init__( + self, + target_metric: Metric, + metric_aggregation: MetricAggregationStatistics = "mean" + ): + # this code is never executed, its single purpose is to help linter (and user reading code) infer parameters types + self.target_metric: Metric = target_metric + self.metric_aggregation: MetricAggregationStatistics = metric_aggregation + self._optuna: Optional[Optuna] = None + def summary(self) -> pd.DataFrame: """Get Auto trials summary.""" if self._optuna is None: From bd6a0ccf3094c95aa137332630a2f6ae13087ba4 Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 15 Feb 2023 16:57:58 +0300 Subject: [PATCH 07/13] lint_4 --- etna/auto/auto.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 6af2c6680..503ea2f8d 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -41,17 +41,13 @@ def __call__(self, pipeline: Pipeline) -> None: class AutoBase: """Base Class for Auto and Tune, implementing core logic behind these classes.""" - - def __init__( - self, - target_metric: Metric, - metric_aggregation: MetricAggregationStatistics = "mean" - ): + + def __init__(self, target_metric: Metric, metric_aggregation: MetricAggregationStatistics = "mean"): # this code is never executed, its single purpose is to help linter (and user reading code) infer parameters types self.target_metric: Metric = target_metric self.metric_aggregation: MetricAggregationStatistics = metric_aggregation self._optuna: Optional[Optuna] = None - + def summary(self) -> pd.DataFrame: """Get Auto trials summary.""" if self._optuna is None: From 1e5aa346c956e5bac9a2589ff2b5ec64976c726f Mon Sep 17 00:00:00 2001 From: gooseit Date: Mon, 20 Feb 2023 17:35:03 +0300 Subject: [PATCH 08/13] fix inheritance structure --- etna/auto/auto.py | 124 ++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 59 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 503ea2f8d..e27c04063 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -39,65 +39,8 @@ def __call__(self, pipeline: Pipeline) -> None: ... -class AutoBase: - """Base Class for Auto and Tune, implementing core logic behind these classes.""" - - def __init__(self, target_metric: Metric, metric_aggregation: MetricAggregationStatistics = "mean"): - # this code is never executed, its single purpose is to help linter (and user reading code) infer parameters types - self.target_metric: Metric = target_metric - self.metric_aggregation: MetricAggregationStatistics = metric_aggregation - self._optuna: Optional[Optuna] = None - - def summary(self) -> pd.DataFrame: - """Get Auto trials summary.""" - if self._optuna is None: - self._optuna = self._init_optuna() - - study = self._optuna.study.get_trials() - - study_params = [ - {**trial.user_attrs, "pipeline": get_from_params(**trial.user_attrs["pipeline"]), "state": trial.state} - for trial in study - ] - - return pd.DataFrame(study_params) - - def top_k(self, k: int = 5) -> List[Pipeline]: - """ - Get top k pipelines. - - Parameters - ---------- - k: - number of pipelines to return - """ - summary = self.summary() - df = summary.sort_values( - by=[f"{self.target_metric.name}_{self.metric_aggregation}"], - ascending=(not self.target_metric.greater_is_better), - ) - return [pipeline for pipeline in df["pipeline"].values[:k]] # noqa: C416 - - def _init_optuna(self): - """Initialize optuna.""" - if isinstance(self.pool, Pool): - pool: List[Pipeline] = self.pool.value.generate(horizon=self.horizon) - else: - pool = self.pool - - pool_ = [pipeline.to_dict() for pipeline in pool] - - optuna = Optuna( - direction="maximize" if self.target_metric.greater_is_better else "minimize", - study_name=self.experiment_folder, - storage=self.storage, - sampler=ConfigSampler(configs=pool_), - ) - return optuna - - class AutoAbstract(ABC): - """Interface for Auto object.""" + """Interface for ``Auto`` object.""" @abstractmethod def fit( @@ -184,7 +127,53 @@ def objective( pass -class Auto(AutoBase, AutoAbstract): +class AutoBase(AutoAbstract): + """Base Class for ``Auto`` and ``Tune``, implementing core logic behind these classes.""" + + def __init__(self, target_metric: Metric, metric_aggregation: MetricAggregationStatistics = "mean"): + # this code is never executed, its single purpose is to help linter (and the user reading code) infer parameters types + self.target_metric: Metric = target_metric + self.metric_aggregation: MetricAggregationStatistics = metric_aggregation + self._optuna: Optional[Optuna] = None + + def summary(self) -> pd.DataFrame: + """Get Auto trials summary. + + Returns + ------- + study_dataframe: + dataframe with detailed info on each performed trial + """ + if self._optuna is None: + self._optuna = self._init_optuna() + + study = self._optuna.study.get_trials() + + study_params = [ + {**trial.user_attrs, "pipeline": get_from_params(**trial.user_attrs["pipeline"]), "state": trial.state} + for trial in study + ] + + return pd.DataFrame(study_params) + + def top_k(self, k: int = 5) -> List[Pipeline]: + """ + Get top k pipelines. + + Parameters + ---------- + k: + number of pipelines to return + """ + summary = self.summary() + df = summary.sort_values( + by=[f"{self.target_metric.name}_{self.metric_aggregation}"], + ascending=(not self.target_metric.greater_is_better), + ) + return [pipeline for pipeline in df["pipeline"].values[:k]] # noqa: C416 + + +class Auto(AutoBase): """Automatic pipeline selection via defined or custom pipeline pool.""" def __init__( @@ -342,3 +331,20 @@ def _objective(trial: Trial) -> float: return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] return _objective + + def _init_optuna(self): + """Initialize optuna.""" + if isinstance(self.pool, Pool): + pool: List[Pipeline] = self.pool.value.generate(horizon=self.horizon) + else: + pool = self.pool + + pool_ = [pipeline.to_dict() for pipeline in pool] + + optuna = Optuna( + direction="maximize" if self.target_metric.greater_is_better else "minimize", + study_name=self.experiment_folder, + storage=self.storage, + sampler=ConfigSampler(configs=pool_), + ) + return optuna From 35900a0bb5a0624bf148fbeadfb019e9342a7730 Mon Sep 17 00:00:00 2001 From: gooseit Date: Tue, 21 Feb 2023 11:48:45 +0300 Subject: [PATCH 09/13] init & changelog update --- CHANGELOG.md | 2 +- etna/auto/auto.py | 72 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 013578678..0cd288d5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ### Changed -- Create AutoBase and AutoAbstract classes, some of Auto class's logic moved there ([#1023](https://github.com/tinkoff-ai/etna/pull/1023)) +- Create `AutoBase` and `AutoAbstract` classes, some of `Auto` class's logic moved there ([#1114](https://github.com/tinkoff-ai/etna/pull/1114)) - Impose specific order of columns on return value of TSDataset.to_flatten ([#1095](https://github.com/tinkoff-ai/etna/pull/1095)) ### Fixed diff --git a/etna/auto/auto.py b/etna/auto/auto.py index e27c04063..747ca602e 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -130,10 +130,54 @@ def objective( class AutoBase(AutoAbstract): """Base Class for ``Auto`` and ``Tune``, implementing core logic behind these classes.""" - def __init__(self, target_metric: Metric, metric_aggregation: MetricAggregationStatistics = "mean"): - # this code is never executed, its single purpose is to help linter (and the user reading code) infer parameters types - self.target_metric: Metric = target_metric + def __init__( + self, + target_metric: Metric, + horizon: int, + metric_aggregation: MetricAggregationStatistics = "mean", + backtest_params: Optional[dict] = None, + experiment_folder: Optional[str] = None, + runner: Optional[AbstractRunner] = None, + storage: Optional[BaseStorage] = None, + metrics: Optional[List[Metric]] = None, + ): + """ + Initialize AutoBase class. + + Parameters + ---------- + target_metric: + metric to optimize + horizon: + horizon to forecast for + metric_aggregation: + aggregation method for per-segment metrics + backtest_params: + custom parameters for backtest instead of default backtest parameters + experiment_folder: + folder to store experiment results and name for optuna study + runner: + runner to use for distributed training + storage: + optuna storage to use + metrics: + list of metrics to compute + """ + if target_metric.greater_is_better is None: + raise ValueError("target_metric.greater_is_better is None") + self.target_metric = target_metric + self.horizon = horizon self.metric_aggregation: MetricAggregationStatistics = metric_aggregation + self.backtest_params = {} if backtest_params is None else backtest_params + self.experiment_folder = experiment_folder + + self.runner = LocalRunner() if runner is None else runner + self.storage = RDBStorage("sqlite:///etna-auto.db") if storage is None else storage + + metrics = [Sign(), SMAPE(), MAE(), MSE(), MedAE()] if metrics is None else metrics + if str(target_metric) not in [str(metric) for metric in metrics]: + metrics.append(target_metric) + self.metrics = metrics self._optuna: Optional[Optuna] = None def summary(self) -> pd.DataFrame: @@ -212,23 +256,10 @@ def __init__( metrics: list of metrics to compute """ - if target_metric.greater_is_better is None: - raise ValueError("target_metric.greater_is_better is None") - self.target_metric = target_metric - - self.metric_aggregation = metric_aggregation - self.backtest_params = {} if backtest_params is None else backtest_params - self.horizon = horizon - self.experiment_folder = experiment_folder - self.pool = pool - self.runner = LocalRunner() if runner is None else runner - self.storage = RDBStorage("sqlite:///etna-auto.db") if storage is None else storage - - metrics = [Sign(), SMAPE(), MAE(), MSE(), MedAE()] if metrics is None else metrics - if str(target_metric) not in [str(metric) for metric in metrics]: - metrics.append(target_metric) - self.metrics = metrics - self._optuna: Optional[Optuna] = None + super().__init__( + target_metric, horizon, metric_aggregation, backtest_params, experiment_folder, runner, storage, metrics + ) + self.pool = poolS def fit( self, @@ -290,6 +321,7 @@ def objective( ) -> Callable[[Trial], float]: """ Optuna objective wrapper. + Parameters ---------- ts: From f738796d5581a91e64ca8b8a80b4bd230cdbfbff Mon Sep 17 00:00:00 2001 From: gooseit Date: Tue, 21 Feb 2023 11:55:16 +0300 Subject: [PATCH 10/13] fixed typo --- etna/auto/auto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 747ca602e..f8e56ca49 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -259,7 +259,7 @@ def __init__( super().__init__( target_metric, horizon, metric_aggregation, backtest_params, experiment_folder, runner, storage, metrics ) - self.pool = poolS + self.pool = pool def fit( self, From 5b30a7b31f57f37b8ea395218203ab87cf4e9dbe Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 22 Feb 2023 16:43:06 +0300 Subject: [PATCH 11/13] not_to_discard_changes --- etna/auto/auto.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index f8e56ca49..4a4f75fa7 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -257,7 +257,14 @@ def __init__( list of metrics to compute """ super().__init__( - target_metric, horizon, metric_aggregation, backtest_params, experiment_folder, runner, storage, metrics + target_metric=target_matric, + horizon=horizon, + metric_aggregation=metric_aggregation, + backtest_param=backtest_params, + experiment_folder=experiment_folder, + runner=runner, + storage=storage, + metrics=metrics ) self.pool = pool From c73cec24b2e3de945b87f2a27df7af5e40f2033b Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 22 Feb 2023 17:08:06 +0300 Subject: [PATCH 12/13] not_to_discard_changes --- etna/auto/auto.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 4a4f75fa7..334bedf3a 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -257,10 +257,10 @@ def __init__( list of metrics to compute """ super().__init__( - target_metric=target_matric, + target_metric=target_metric, horizon=horizon, metric_aggregation=metric_aggregation, - backtest_param=backtest_params, + backtest_params=backtest_params, experiment_folder=experiment_folder, runner=runner, storage=storage, From 37f2106f2d6cb0151a7a2c2f611d33b76824060c Mon Sep 17 00:00:00 2001 From: gooseit Date: Wed, 22 Feb 2023 20:02:43 +0300 Subject: [PATCH 13/13] mock tests changed to work with AutoBase, auto.py code refactored --- etna/auto/auto.py | 2 +- tests/test_auto/test_auto.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 334bedf3a..c7286fc3b 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -264,7 +264,7 @@ def __init__( experiment_folder=experiment_folder, runner=runner, storage=storage, - metrics=metrics + metrics=metrics, ) self.pool = pool diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index 6d56b5bf8..4061aec55 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -8,6 +8,7 @@ from typing_extensions import NamedTuple from etna.auto import Auto +from etna.auto.auto import AutoBase from etna.auto.auto import _Callback from etna.auto.auto import _Initializer from etna.metrics import MAE @@ -124,7 +125,7 @@ def test_summary( auto=MagicMock(), ): auto._optuna.study.get_trials.return_value = trials - df_summary = Auto.summary(self=auto) + df_summary = AutoBase.summary(self=auto) assert len(df_summary) == len(trials) assert list(df_summary["SMAPE_median"].values) == [trial.user_attrs["SMAPE_median"] for trial in trials] @@ -140,8 +141,8 @@ def test_top_k( auto.metric_aggregation = "median" auto.target_metric.greater_is_better = False - df_summary = Auto.summary(self=auto) + df_summary = AutoBase.summary(self=auto) auto.summary = MagicMock(return_value=df_summary) - top_k = Auto.top_k(auto, k=k) + top_k = AutoBase.top_k(auto, k=k) assert len(top_k) == k assert [pipeline.model.lag for pipeline in top_k] == [i for i in range(k)] # noqa C416