From 66c9263e15dcb51ff6ee773ff67ebc2c2ad0913d Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Thu, 27 Apr 2023 12:38:34 +0300
Subject: [PATCH 1/7] feature: add default params_to_tune for feature selection
 transforms

---
 .../feature_selection/feature_importance.py   | 67 +++++++++++++++++--
 .../feature_selection/gale_shapley.py         | 23 +++++++
 .../test_feature_importance_transform.py      | 32 +++++++++
 .../test_gale_shapley_transform.py            | 16 +++++
 4 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/etna/transforms/feature_selection/feature_importance.py b/etna/transforms/feature_selection/feature_importance.py
index a1470a613..c59aaacfb 100644
--- a/etna/transforms/feature_selection/feature_importance.py
+++ b/etna/transforms/feature_selection/feature_importance.py
@@ -14,12 +14,19 @@
 from sklearn.tree import ExtraTreeRegressor
 from typing_extensions import Literal
 
+from etna import SETTINGS
 from etna.analysis import RelevanceTable
 from etna.analysis.feature_selection.mrmr_selection import AggregationMode
 from etna.analysis.feature_selection.mrmr_selection import mrmr
 from etna.datasets import TSDataset
 from etna.transforms.feature_selection import BaseFeatureSelectionTransform
 
+if SETTINGS.auto_required:
+    from optuna.distributions import BaseDistribution
+    from optuna.distributions import CategoricalDistribution
+    from optuna.distributions import IntUniformDistribution
+
+
 TreeBasedRegressor = Union[
     DecisionTreeRegressor,
     ExtraTreeRegressor,
@@ -41,7 +48,7 @@ class TreeFeatureSelectionTransform(BaseFeatureSelectionTransform):
 
     def __init__(
         self,
-        model: TreeBasedRegressor,
+        model: Union[Literal["catboost"], Literal["random_forest"], TreeBasedRegressor],
         top_k: int,
         features_to_use: Union[List[str], Literal["all"]] = "all",
         return_features: bool = False,
@@ -52,8 +59,14 @@ def __init__(
         Parameters
         ----------
         model:
-            model to make selection, it should have ``feature_importances_`` property
-            (e.g. all tree-based regressors in sklearn)
+            Model to make selection, it should have ``feature_importances_`` property
+            (e.g. all tree-based regressors in sklearn).
+            Pre-defined options are also available:
+
+            * catboost: ``catboost.CatBoostRegressor(silent=True)``, this model won't work if there are any category types
+
+            * random_forest: ``sklearn.ensemble.RandomForestRegressor(random_state=0)``
+
         top_k:
             num of features to select; if there are not enough features, then all will be selected
         features_to_use:
@@ -64,8 +77,16 @@ def __init__(
         if not isinstance(top_k, int) or top_k < 0:
             raise ValueError("Parameter top_k should be positive integer")
         super().__init__(features_to_use=features_to_use, return_features=return_features)
-        self.model = model
         self.top_k = top_k
+        if isinstance(model, str):
+            if model == "catboost":
+                self.model = CatBoostRegressor(silent=True)
+            elif model == "random_forest":
+                self.model = RandomForestRegressor(random_state=0)
+            else:
+                raise ValueError(f"Not a valid option for model: {model}")
+        else:
+            self.model = model
 
     def _get_train(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """Get train data for model."""
@@ -102,7 +123,7 @@ def _fit(self, df: pd.DataFrame) -> "TreeFeatureSelectionTransform":
 
         Returns
         -------
-        result: TreeFeatureSelectionTransform
+        result:
             instance after fitting
         """
         if len(self._get_features_to_use(df)) == 0:
@@ -112,6 +133,24 @@ def _fit(self, df: pd.DataFrame) -> "TreeFeatureSelectionTransform":
         self.selected_features = self._select_top_k_features(weights, self.top_k)
         return self
 
+    def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
+        """Get default grid for tuning hyperparameters.
+
+        This grid tunes parameters: ``model``, ``top_k``. Other parameters are expected to be set by the user.
+
+        For ``model`` parameter only pre-defined options are suggested.
+        For ``top_k`` parameter the maximum suggested value is not greater than ``self.top_k``.
+
+        Returns
+        -------
+        :
+            Grid to tune.
+        """
+        return {
+            "model": CategoricalDistribution(["catboost", "random_forest"]),
+            "top_k": IntUniformDistribution(low=1, high=self.top_k),
+        }
+
 
 class MRMRFeatureSelectionTransform(BaseFeatureSelectionTransform):
     """Transform that selects features according to MRMR variable selection method adapted to the timeseries case.
@@ -176,7 +215,7 @@ def _fit(self, df: pd.DataFrame) -> "MRMRFeatureSelectionTransform":
 
         Returns
         -------
-        result: MRMRFeatureSelectionTransform
+        result:
             instance after fitting
         """
         features = self._get_features_to_use(df)
@@ -193,3 +232,19 @@ def _fit(self, df: pd.DataFrame) -> "MRMRFeatureSelectionTransform":
             atol=self.atol,
         )
         return self
+
+    def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
+        """Get default grid for tuning hyperparameters.
+
+        This grid tunes only ``top_k`` parameter. Other parameters are expected to be set by the user.
+
+        For ``top_k`` parameter the maximum suggested value is not greater than ``self.top_k``.
+
+        Returns
+        -------
+        :
+            Grid to tune.
+        """
+        return {
+            "top_k": IntUniformDistribution(low=1, high=self.top_k),
+        }
diff --git a/etna/transforms/feature_selection/gale_shapley.py b/etna/transforms/feature_selection/gale_shapley.py
index b1e452f05..b57aa72ab 100644
--- a/etna/transforms/feature_selection/gale_shapley.py
+++ b/etna/transforms/feature_selection/gale_shapley.py
@@ -8,10 +8,16 @@
 import pandas as pd
 from typing_extensions import Literal
 
+from etna import SETTINGS
 from etna.analysis import RelevanceTable
 from etna.core import BaseMixin
 from etna.transforms.feature_selection.base import BaseFeatureSelectionTransform
 
+if SETTINGS.auto_required:
+    from optuna.distributions import BaseDistribution
+    from optuna.distributions import CategoricalDistribution
+    from optuna.distributions import IntUniformDistribution
+
 
 class BaseGaleShapley(BaseMixin):
     """Base class for a member of Gale-Shapley matching."""
@@ -385,3 +391,20 @@ def _fit(self, df: pd.DataFrame) -> "GaleShapleyFeatureSelectionTransform":
                 segment_features_ranking=segment_features_ranking, features_to_drop=selected_features
             )
         return self
+
+    def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
+        """Get default grid for tuning hyperparameters.
+
+        This grid tunes parameters: ``top_k``, ``use_rank``. Other parameters are expected to be set by the user.
+
+        For ``top_k`` parameter the maximum suggested value is not greater than ``self.top_k``.
+
+        Returns
+        -------
+        :
+            Grid to tune.
+        """
+        return {
+            "top_k": IntUniformDistribution(low=1, high=self.top_k),
+            "use_rank": CategoricalDistribution([False, True]),
+        }
diff --git a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
index 5ab511e13..9d1f65aec 100644
--- a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
@@ -18,6 +18,7 @@
 from etna.transforms import SegmentEncoderTransform
 from etna.transforms.feature_selection import TreeFeatureSelectionTransform
 from etna.transforms.feature_selection.feature_importance import MRMRFeatureSelectionTransform
+from tests.test_transforms.utils import assert_sampling_is_valid
 from tests.test_transforms.utils import assert_transformation_equals_loaded_original
 
 
@@ -67,9 +68,16 @@ def ts_with_regressors():
     )
 
 
+def test_create_with_unknown_model(ts_with_exog):
+    with pytest.raises(ValueError, match="Not a valid option for model: .*"):
+        _ = TreeFeatureSelectionTransform(model="unknown", top_k=3, features_to_use="all")
+
+
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -86,6 +94,7 @@ def test_work_with_non_regressors(ts_with_exog, model):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -113,6 +122,7 @@ def test_selected_top_k_regressors(model, top_k, ts_with_regressors):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -141,6 +151,7 @@ def test_retain_values(model, top_k, ts_with_regressors):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -158,6 +169,8 @@ def test_fails_negative_top_k(model):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -178,6 +191,7 @@ def test_warns_no_regressors(model, example_tsds):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -202,6 +216,7 @@ def test_sanity_selected(model, ts_with_regressors):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -231,6 +246,7 @@ def test_sanity_model(model, ts_with_regressors):
 @pytest.mark.parametrize(
     "model",
     [
+        "random_forest",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -286,3 +302,19 @@ def test_mrmr_right_regressors(relevance_table, ts_with_regressors):
 )
 def test_save_load(transform, ts_with_regressors):
     assert_transformation_equals_loaded_original(transform=transform, ts=ts_with_regressors)
+
+
+@pytest.mark.parametrize(
+    "transform",
+    [
+        TreeFeatureSelectionTransform(model=DecisionTreeRegressor(random_state=42), top_k=3),
+        MRMRFeatureSelectionTransform(
+            relevance_table=ModelRelevanceTable(), top_k=3, model=RandomForestRegressor(random_state=42)
+        ),
+        MRMRFeatureSelectionTransform(relevance_table=StatisticsRelevanceTable(), top_k=3),
+    ],
+)
+def test_params_to_tune(transform, ts_with_regressors):
+    ts = ts_with_regressors
+    assert len(transform.params_to_tune()) > 0
+    assert_sampling_is_valid(transform=transform, ts=ts)
diff --git a/tests/test_transforms/test_feature_selection/test_gale_shapley_transform.py b/tests/test_transforms/test_feature_selection/test_gale_shapley_transform.py
index 37cc00ff8..87322e356 100644
--- a/tests/test_transforms/test_feature_selection/test_gale_shapley_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_gale_shapley_transform.py
@@ -16,6 +16,7 @@
 from etna.transforms.feature_selection.gale_shapley import FeatureGaleShapley
 from etna.transforms.feature_selection.gale_shapley import GaleShapleyMatcher
 from etna.transforms.feature_selection.gale_shapley import SegmentGaleShapley
+from tests.test_transforms.utils import assert_sampling_is_valid
 from tests.test_transforms.utils import assert_transformation_equals_loaded_original
 
 
@@ -659,3 +660,18 @@ def test_right_number_features_with_integer_division(ts_with_exog_galeshapley):
 
     remaining_columns = ts.columns.get_level_values("feature").unique().tolist()
     assert len(remaining_columns) == top_k + 1
+
+
+@pytest.mark.parametrize(
+    "transform",
+    [
+        GaleShapleyFeatureSelectionTransform(
+            relevance_table=ModelRelevanceTable(), top_k=3, use_rank=False, model=RandomForestRegressor(random_state=42)
+        ),
+        GaleShapleyFeatureSelectionTransform(relevance_table=StatisticsRelevanceTable(), top_k=3, use_rank=False),
+    ],
+)
+def test_params_to_tune(transform, ts_with_large_regressors_number):
+    ts = ts_with_large_regressors_number
+    assert len(transform.params_to_tune()) > 0
+    assert_sampling_is_valid(transform=transform, ts=ts)

From e5c27c863f25810ea2ee082950d1130571c2b89e Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Thu, 27 Apr 2023 12:40:51 +0300
Subject: [PATCH 2/7] chore: update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a2539f434..21f1080a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -56,6 +56,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add default `params_to_tune` for `TimeSeriesImputerTransform` ([#1232](https://github.com/tinkoff-ai/etna/pull/1232))
 - Add default `params_to_tune` for `DifferencingTransform`, `MedianTransform`, `MaxTransform`, `MinTransform`, `QuantileTransform`, `StdTransform`, `MeanTransform`, `MADTransform`, `MinMaxDifferenceTransform`, `SumTransform`, `BoxCoxTransform`, `YeoJohnsonTransform`, `MaxAbsScalerTransform`, `MinMaxScalerTransform`, `RobustScalerTransform` and `StandardScalerTransform` ([#1233](https://github.com/tinkoff-ai/etna/pull/1233))
 - Add default `params_to_tune` for `LabelEncoderTransform` ([#1242](https://github.com/tinkoff-ai/etna/pull/1242))
+- Add default `params_to_tune` for `TreeFeatureSelectionTransform`, `MRMRFeatureSelectionTransform` and `GaleShapleyFeatureSelectionTransform` ([#1250](https://github.com/tinkoff-ai/etna/pull/1250))
 ### Fixed
 - Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
 - `ProphetModel` fails with additional seasonality set ([#1157](https://github.com/tinkoff-ai/etna/pull/1157))

From a55ab0967293495d53e05e9b75430edae15b39ac Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Thu, 27 Apr 2023 12:43:36 +0300
Subject: [PATCH 3/7] test: add test on params_to_tune for
 FilterFeaturesTransform

---
 .../test_feature_selection/test_filter_transform.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_transforms/test_feature_selection/test_filter_transform.py b/tests/test_transforms/test_feature_selection/test_filter_transform.py
index 5f1adcdf6..42ae4e211 100644
--- a/tests/test_transforms/test_feature_selection/test_filter_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_filter_transform.py
@@ -201,3 +201,16 @@ def test_inverse_transform_back_included_columns(ts_with_features, columns, retu
 )
 def test_save_load(transform, ts_with_features):
     assert_transformation_equals_loaded_original(transform=transform, ts=ts_with_features)
+
+
+@pytest.mark.parametrize(
+    "transform",
+    [
+        FilterFeaturesTransform(include=["target"], return_features=True),
+        FilterFeaturesTransform(include=["target"], return_features=False),
+        FilterFeaturesTransform(exclude=["exog_1", "exog_2"], return_features=False),
+        FilterFeaturesTransform(exclude=["exog_1", "exog_2"], return_features=False),
+    ],
+)
+def test_params_to_tune(transform):
+    assert len(transform.params_to_tune()) == 0

From 30b8c6182408e50dc9408733abd7eb3694a2401c Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 28 Apr 2023 11:04:23 +0300
Subject: [PATCH 4/7] docs: add number of trees in documentation for
 TreeFeatureSelectionTransform

---
 etna/transforms/feature_selection/feature_importance.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/etna/transforms/feature_selection/feature_importance.py b/etna/transforms/feature_selection/feature_importance.py
index c59aaacfb..148bb0177 100644
--- a/etna/transforms/feature_selection/feature_importance.py
+++ b/etna/transforms/feature_selection/feature_importance.py
@@ -63,9 +63,10 @@ def __init__(
             (e.g. all tree-based regressors in sklearn).
             Pre-defined options are also available:
 
-            * catboost: ``catboost.CatBoostRegressor(silent=True)``, this model won't work if there are any category types
+            * catboost: ``catboost.CatBoostRegressor(iterations=1000, silent=True)``,
+            this model won't work if there are any category types
 
-            * random_forest: ``sklearn.ensemble.RandomForestRegressor(random_state=0)``
+            * random_forest: ``sklearn.ensemble.RandomForestRegressor(n_estimators=100, random_state=0)``
 
         top_k:
             num of features to select; if there are not enough features, then all will be selected
@@ -80,7 +81,7 @@ def __init__(
         self.top_k = top_k
         if isinstance(model, str):
             if model == "catboost":
-                self.model = CatBoostRegressor(silent=True)
+                self.model = CatBoostRegressor(iterations=1000, silent=True)
             elif model == "random_forest":
                 self.model = RandomForestRegressor(random_state=0)
             else:

From 579601cb28b49d5b3a339eaadcd6e5b08e3eee82 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 28 Apr 2023 12:08:01 +0300
Subject: [PATCH 5/7] fix: fix behavior with category dtype for catboost model
 in TreeFeatureSelectionTransform

---
 .../feature_selection/feature_importance.py      | 16 ++++++++++++----
 .../test_feature_importance_transform.py         |  6 ++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/etna/transforms/feature_selection/feature_importance.py b/etna/transforms/feature_selection/feature_importance.py
index 148bb0177..da2b83e50 100644
--- a/etna/transforms/feature_selection/feature_importance.py
+++ b/etna/transforms/feature_selection/feature_importance.py
@@ -61,12 +61,15 @@ def __init__(
         model:
             Model to make selection, it should have ``feature_importances_`` property
             (e.g. all tree-based regressors in sklearn).
+
+            If ``catboost.CatBoostRegressor`` is given with no ``cat_features`` parameter,
+            then ``cat_features`` are set during ``fit`` to be equal to columns of category type.
+
             Pre-defined options are also available:
 
-            * catboost: ``catboost.CatBoostRegressor(iterations=1000, silent=True)``,
-            this model won't work if there are any category types
+            * catboost: ``catboost.CatBoostRegressor(iterations=1000, silent=True)``;
 
-            * random_forest: ``sklearn.ensemble.RandomForestRegressor(n_estimators=100, random_state=0)``
+            * random_forest: ``sklearn.ensemble.RandomForestRegressor(n_estimators=100, random_state=0)``.
 
         top_k:
             num of features to select; if there are not enough features, then all will be selected
@@ -100,7 +103,12 @@ def _get_train(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
     def _get_features_weights(self, df: pd.DataFrame) -> Dict[str, float]:
         """Get weights for features based on model feature importances."""
         train_data, train_target = self._get_train(df)
-        self.model.fit(train_data, train_target)
+        if isinstance(self.model, CatBoostRegressor) and self.model.get_param("cat_features") is None:
+            dtypes = train_data.dtypes
+            cat_features = dtypes[dtypes == "category"].index.tolist()
+            self.model.fit(train_data, train_target, cat_features=cat_features)
+        else:
+            self.model.fit(train_data, train_target)
         weights_array = self.model.feature_importances_
         weights_dict = {column: weights_array[i] for i, column in enumerate(train_data.columns)}
         return weights_dict
diff --git a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
index 9d1f65aec..337ef1113 100644
--- a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
@@ -95,6 +95,7 @@ def test_work_with_non_regressors(ts_with_exog, model):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -123,6 +124,7 @@ def test_selected_top_k_regressors(model, top_k, ts_with_regressors):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -152,6 +154,7 @@ def test_retain_values(model, top_k, ts_with_regressors):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -192,6 +195,7 @@ def test_warns_no_regressors(model, example_tsds):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -217,6 +221,7 @@ def test_sanity_selected(model, ts_with_regressors):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),
@@ -247,6 +252,7 @@ def test_sanity_model(model, ts_with_regressors):
     "model",
     [
         "random_forest",
+        "catboost",
         DecisionTreeRegressor(random_state=42),
         ExtraTreeRegressor(random_state=42),
         RandomForestRegressor(n_estimators=10, random_state=42),

From 71558ba87ef49862f85818c231ec659d3b2cad62 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 28 Apr 2023 19:01:51 +0300
Subject: [PATCH 6/7] test: add new fixture, add test on catboost models

---
 .../test_feature_importance_transform.py      | 45 ++++++++++++-------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
index 337ef1113..a4e436839 100644
--- a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
@@ -68,11 +68,32 @@ def ts_with_regressors():
     )
 
 
+@pytest.fixture
+def ts_with_regressors_and_features(ts_with_regressors):
+    le_encoder = SegmentEncoderTransform()
+    le_encoder.fit_transform(ts_with_regressors)
+    return ts_with_regressors
+
+
 def test_create_with_unknown_model(ts_with_exog):
     with pytest.raises(ValueError, match="Not a valid option for model: .*"):
         _ = TreeFeatureSelectionTransform(model="unknown", top_k=3, features_to_use="all")
 
 
+@pytest.mark.parametrize(
+    "model",
+    [
+        "catboost",
+        CatBoostRegressor(iterations=10, random_state=42, silent=True),
+        CatBoostRegressor(iterations=10, random_state=42, silent=True, cat_features=["segment_code"]),
+    ],
+)
+def test_catboost_with_cat_features(model, ts_with_regressors_and_features):
+    """Check that transform with catboost model can work with cat features in a dataset."""
+    selector = TreeFeatureSelectionTransform(model=model, top_k=3, features_to_use="all")
+    selector.fit_transform(ts_with_regressors_and_features)
+
+
 @pytest.mark.parametrize(
     "model",
     [
@@ -105,14 +126,10 @@ def test_work_with_non_regressors(ts_with_exog, model):
     ],
 )
 @pytest.mark.parametrize("top_k", [0, 1, 5, 15, 50])
-def test_selected_top_k_regressors(model, top_k, ts_with_regressors):
+def test_selected_top_k_regressors(model, top_k, ts_with_regressors_and_features):
     """Check that transform selects exactly top_k regressors if where are this much."""
-    all_regressors = ts_with_regressors.regressors
-    all_regressors.append("segment_code")
-
-    ts = ts_with_regressors
-    le_encoder = SegmentEncoderTransform()
-    le_encoder.fit_transform(ts)
+    ts = ts_with_regressors_and_features
+    all_regressors = ts_with_regressors_and_features.regressors
     selector = TreeFeatureSelectionTransform(model=model, top_k=top_k)
     selector.fit_transform(ts)
 
@@ -134,16 +151,14 @@ def test_selected_top_k_regressors(model, top_k, ts_with_regressors):
     ],
 )
 @pytest.mark.parametrize("top_k", [0, 1, 5, 15, 50])
-def test_retain_values(model, top_k, ts_with_regressors):
+def test_retain_values(model, top_k, ts_with_regressors_and_features):
     """Check that transform doesn't change values of columns."""
-    ts = ts_with_regressors
-    le_encoder = SegmentEncoderTransform()
-    le_encoder.fit_transform(ts)
+    ts = ts_with_regressors_and_features
     df_encoded = ts.to_pandas()
     selector = TreeFeatureSelectionTransform(model=model, top_k=top_k)
     df_selected = selector.fit_transform(ts).to_pandas()
 
-    for segment in ts_with_regressors.segments:
+    for segment in ts.segments:
         for column in df_selected.columns.get_level_values("feature").unique():
             assert (
                 df_selected.loc[:, pd.IndexSlice[segment, column]] == df_encoded.loc[:, pd.IndexSlice[segment, column]]
@@ -204,11 +219,9 @@ def test_warns_no_regressors(model, example_tsds):
         CatBoostRegressor(iterations=700, random_state=42, silent=True, cat_features=["segment_code"]),
     ],
 )
-def test_sanity_selected(model, ts_with_regressors):
+def test_sanity_selected(model, ts_with_regressors_and_features):
     """Check that transform correctly finds meaningful regressors."""
-    ts = ts_with_regressors
-    le_encoder = SegmentEncoderTransform()
-    le_encoder.fit_transform(ts)
+    ts = ts_with_regressors_and_features
     selector = TreeFeatureSelectionTransform(model=model, top_k=8)
     df_selected = selector.fit_transform(ts).to_pandas()
     features_columns = df_selected.columns.get_level_values("feature").unique()

From 48638b74fc8bf5ffcd066227cc5585ced2735596 Mon Sep 17 00:00:00 2001
From: "d.a.bunin" <d.a.bunin@tinkoff.ru>
Date: Fri, 28 Apr 2023 19:11:39 +0300
Subject: [PATCH 7/7] test: simplify tests

---
 .../test_feature_importance_transform.py      | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
index a4e436839..56fe0ba84 100644
--- a/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
+++ b/tests/test_transforms/test_feature_selection/test_feature_importance_transform.py
@@ -122,14 +122,14 @@ def test_work_with_non_regressors(ts_with_exog, model):
         RandomForestRegressor(n_estimators=10, random_state=42),
         ExtraTreesRegressor(n_estimators=10, random_state=42),
         GradientBoostingRegressor(n_estimators=10, random_state=42),
-        CatBoostRegressor(iterations=10, random_state=42, silent=True, cat_features=["segment_code"]),
+        CatBoostRegressor(iterations=10, random_state=42, silent=True),
     ],
 )
 @pytest.mark.parametrize("top_k", [0, 1, 5, 15, 50])
-def test_selected_top_k_regressors(model, top_k, ts_with_regressors_and_features):
+def test_selected_top_k_regressors(model, top_k, ts_with_regressors):
     """Check that transform selects exactly top_k regressors if where are this much."""
-    ts = ts_with_regressors_and_features
-    all_regressors = ts_with_regressors_and_features.regressors
+    ts = ts_with_regressors
+    all_regressors = ts_with_regressors.regressors
     selector = TreeFeatureSelectionTransform(model=model, top_k=top_k)
     selector.fit_transform(ts)
 
@@ -147,13 +147,13 @@ def test_selected_top_k_regressors(model, top_k, ts_with_regressors_and_features
         RandomForestRegressor(n_estimators=10, random_state=42),
         ExtraTreesRegressor(n_estimators=10, random_state=42),
         GradientBoostingRegressor(n_estimators=10, random_state=42),
-        CatBoostRegressor(iterations=10, random_state=42, silent=True, cat_features=["segment_code"]),
+        CatBoostRegressor(iterations=10, random_state=42, silent=True),
     ],
 )
 @pytest.mark.parametrize("top_k", [0, 1, 5, 15, 50])
-def test_retain_values(model, top_k, ts_with_regressors_and_features):
+def test_retain_values(model, top_k, ts_with_regressors):
     """Check that transform doesn't change values of columns."""
-    ts = ts_with_regressors_and_features
+    ts = ts_with_regressors
     df_encoded = ts.to_pandas()
     selector = TreeFeatureSelectionTransform(model=model, top_k=top_k)
     df_selected = selector.fit_transform(ts).to_pandas()
@@ -216,13 +216,13 @@ def test_warns_no_regressors(model, example_tsds):
         RandomForestRegressor(n_estimators=10, random_state=42),
         ExtraTreesRegressor(n_estimators=10, random_state=42),
         GradientBoostingRegressor(n_estimators=10, random_state=42),
-        CatBoostRegressor(iterations=700, random_state=42, silent=True, cat_features=["segment_code"]),
+        CatBoostRegressor(iterations=700, random_state=42, silent=True),
     ],
 )
-def test_sanity_selected(model, ts_with_regressors_and_features):
+def test_sanity_selected(model, ts_with_regressors):
     """Check that transform correctly finds meaningful regressors."""
-    ts = ts_with_regressors_and_features
-    selector = TreeFeatureSelectionTransform(model=model, top_k=8)
+    ts = ts_with_regressors
+    selector = TreeFeatureSelectionTransform(model=model, top_k=10)
     df_selected = selector.fit_transform(ts).to_pandas()
     features_columns = df_selected.columns.get_level_values("feature").unique()
     selected_regressors = [column for column in features_columns if column.startswith("regressor_")]