Skip to content

Commit

Permalink
BUG Gale-Shapley (#1110)
Browse files Browse the repository at this point in the history
* BUG Gale-Shapley

* changelog

* black

* fix comment

---------

Co-authored-by: a.makhin <[email protected]>
  • Loading branch information
Ama16 and a.makhin authored Feb 14, 2023
1 parent da08758 commit 7b58e3c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
### Fixed

-
- Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
-

## [1.15.0] - 2023-01-31
Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/feature_selection/gale_shapley.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def fit(self, df: pd.DataFrame) -> "GaleShapleyFeatureSelectionTransform":
segment_features_ranking=segment_features_ranking,
feature_segments_ranking=feature_segments_ranking,
)
if step == gale_shapley_steps_number - 1:
if step == gale_shapley_steps_number - 1 and last_step_features_number != 0:
selected_features = self._process_last_step(
matches=matches,
relevance_table=relevance_table,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,32 @@
from tests.test_transforms.utils import assert_transformation_equals_loaded_original


@pytest.fixture
def ts_with_exog_galeshapley(random_seed) -> TSDataset:
np.random.seed(random_seed)

periods = 30
df_1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-15", periods=periods)})
df_1["segment"] = "segment_1"
df_1["target"] = np.random.uniform(10, 20, size=periods)

df_2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-15", periods=periods)})
df_2["segment"] = "segment_2"
df_2["target"] = np.random.uniform(-15, 5, size=periods)

df = pd.concat([df_1, df_2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")
df = tsds.to_pandas(flatten=True)
df_exog = df.copy().drop(columns=["target"])
df_exog["weekday"] = df_exog["timestamp"].dt.weekday
df_exog["monthday"] = df_exog["timestamp"].dt.day
df_exog["month"] = df_exog["timestamp"].dt.month
df_exog["year"] = df_exog["timestamp"].dt.year
ts = TSDataset(df=TSDataset.to_dataset(df), df_exog=TSDataset.to_dataset(df_exog), freq="D")
return ts


@pytest.fixture
def ts_with_large_regressors_number(random_seed) -> TSDataset:
df = generate_periodic_df(periods=100, start_time="2020-01-01", n_segments=3, period=7, scale=10)
Expand Down Expand Up @@ -622,3 +648,14 @@ def test_work_with_non_regressors(ts_with_exog):
)
def test_save_load(transform, ts_with_large_regressors_number):
assert_transformation_equals_loaded_original(transform=transform, ts=ts_with_large_regressors_number)


def test_right_number_features_with_integer_division(ts_with_exog_galeshapley):
top_k = len(ts_with_exog_galeshapley.segments)
transform = GaleShapleyFeatureSelectionTransform(relevance_table=StatisticsRelevanceTable(), top_k=top_k)

transform.fit(ts_with_exog_galeshapley.to_pandas())
df = transform.transform(ts_with_exog_galeshapley.to_pandas())

remaining_columns = df.columns.get_level_values("feature").unique().tolist()
assert len(remaining_columns) == top_k + 1

1 comment on commit 7b58e3c

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.