From 54ec649e2a12674b9c21c0b0fd856853495ecad1 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 14:16:37 +0200 Subject: [PATCH 01/30] KNN imputer implemenmted --- .../_k_nearest_neighbors_imputer.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py new file mode 100644 index 000000000..b19469135 --- /dev/null +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds._validation import _check_columns_exist +from safeds.data.tabular.containers import Table +from safeds.exceptions import TransformerNotFittedError + +from ._table_transformer import TableTransformer + +if TYPE_CHECKING: + from sklearn.impute import KNNImputer as sk_KNNImputer + +class KNearestNeighborsImputer(TableTransformer): + """ + The KNearestNeighborsImputer replaces missing values in a table with the mean value of the K-nearest neighbors. + + Parameters + ---------- + neighbor_count: + The number of neighbors to consider when imputing missing values. + column_names: + The list of columns used to impute missing values. If 'None', all columns are used. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__( + self, + neighbor_count: int, + *, + column_names: str | list[str] | None = None, + value_to_replace: float | str | None = None, + ) -> None: + super().__init__(column_names) + + # parameter + self._neighbor_count: int = neighbor_count + self._value_to_replace: float | str | None = value_to_replace + + # attributes + self._wrapped_transformer: sk_KNNImputer | None = None + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def is_fitted(self) -> bool: + """Whether the transformer is fitted.""" + return self._wrapped_transformer is not None + + @property + def neighbor_count(self) -> int: + """The number of neighbors to consider when imputing missing values.""" + return self._neighbor_count + + # ------------------------------------------------------------------------------------------------------------------ + # Learning and transformation + # ------------------------------------------------------------------------------------------------------------------ + + def fit(self, table: Table) -> KNearestNeighborsImputer: + """ + Learn a trandformation for a set of columns in a table. + + This transformer is not modified. + + Parameters + ---------- + table: + The table used to fit the transformer. + + Returns + ------- + fitted_transformer: + The fitted transformer. + + Raises + ------ + ColumnNotFoundError + If one of the columns, that should be fitted is not in the table. + """ + from sklearn.impute import KNNImputer as sk_KNNImputer + + if table.row_count == 0: + raise ValueError("The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows.") + + if self._column_names is None: + self._column_names = table.column_names + else: + column_names = self._column_names + _check_columns_exist(Table, column_names) + + wrapped_transformer = sk_KNNImputer(missing_values=self._value_to_replace, n_neighbors=self._neighbor_count) + wrapped_transformer.set_output(transform="polars") + wrapped_transformer.fit( + table.remove_columns_except(column_names)._data_frame, + ) + + result = KNearestNeighborsImputer(self._neighbor_count, column_names=self._column_names, value_to_replace=self._value_to_replace) + result._wrapped_transformer = wrapped_transformer + + return result + + def transform(self, table: Table) -> Table: + """ + Apply the learned transformation to a table. + + The Table is not modified. + + Parameters + ---------- + table: + The table to wich the learned transformation is applied. + + Returns + ------- + transformed_table: + The transformed table. + + Raises + ------ + TransformerNotFittedError + If the transformer is not fitted. + ColumnNotFoundError + If one of the columns, that should be transformed is not in the table. + """ + if self._column_names is None or self._neighbor_count is None or self._wrapped_transformer is None: + raise TransformerNotFittedError + + _check_columns_exist(table, self._column_names) + + new_data = self._wrapped_transformer.transform( + table.remove_columns_except(self._column_names)._data_frame, + ) + + return Table._from_polars_lazy_frame( + table._lazy_frame.update(new_data.lazify()), + ) \ No newline at end of file From ba422013dab4b802bf5fc3326ff9081a1bf66b61 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 14:20:09 +0200 Subject: [PATCH 02/30] modified __init__ --- src/safeds/data/tabular/transformation/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index b7f19d22e..25bd2eb94 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -7,6 +7,7 @@ if TYPE_CHECKING: from ._discretizer import Discretizer from ._invertible_table_transformer import InvertibleTableTransformer + from ._k_nearest_neighbors_imputer import KNearestNeighborsImputer from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder from ._range_scaler import RangeScaler @@ -25,6 +26,7 @@ "SimpleImputer": "._simple_imputer:SimpleImputer", "StandardScaler": "._standard_scaler:StandardScaler", "TableTransformer": "._table_transformer:TableTransformer", + "KNearestNeighborsImputer": "._k_nearest_neighbors_imputer:KNearestNeighborsImputer", }, ) @@ -37,4 +39,5 @@ "SimpleImputer", "StandardScaler", "TableTransformer", + "KNearestNeighborsImputer", ] From e80a651f13eb56d3d2fb93be8ee40d82ed1e775f Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 15:05:15 +0200 Subject: [PATCH 03/30] added tests and change a bit --- .../_k_nearest_neighbors_imputer.py | 18 +++-- .../test_k_nearest_neighbors_imputer.py | 77 +++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index b19469135..d419c15f9 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _structural_hash from safeds._validation import _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import TransformerNotFittedError @@ -29,13 +30,18 @@ class KNearestNeighborsImputer(TableTransformer): def __init__( self, - neighbor_count: int, + neighbor_count: int = 5, *, column_names: str | list[str] | None = None, value_to_replace: float | str | None = None, ) -> None: super().__init__(column_names) + if not isinstance(neighbor_count, int): + raise TypeError('Parameter "neighbor_count" must be a integer.') + if neighbor_count <= 0: + raise ValueError('Parameter "neighbor_count" must be greater than 0.') + # parameter self._neighbor_count: int = neighbor_count self._value_to_replace: float | str | None = value_to_replace @@ -43,6 +49,8 @@ def __init__( # attributes self._wrapped_transformer: sk_KNNImputer | None = None + def __hash__(self) -> int: + return _structural_hash(self) # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -88,10 +96,10 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: raise ValueError("The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows.") if self._column_names is None: - self._column_names = table.column_names + column_names = table.column_names else: column_names = self._column_names - _check_columns_exist(Table, column_names) + _check_columns_exist(table, column_names) wrapped_transformer = sk_KNNImputer(missing_values=self._value_to_replace, n_neighbors=self._neighbor_count) wrapped_transformer.set_output(transform="polars") @@ -99,7 +107,7 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: table.remove_columns_except(column_names)._data_frame, ) - result = KNearestNeighborsImputer(self._neighbor_count, column_names=self._column_names, value_to_replace=self._value_to_replace) + result = KNearestNeighborsImputer(self._neighbor_count, column_names=column_names, value_to_replace=self._value_to_replace) result._wrapped_transformer = wrapped_transformer return result @@ -127,7 +135,7 @@ def transform(self, table: Table) -> Table: ColumnNotFoundError If one of the columns, that should be transformed is not in the table. """ - if self._column_names is None or self._neighbor_count is None or self._wrapped_transformer is None: + if self._column_names is None or self._wrapped_transformer is None: raise TransformerNotFittedError _check_columns_exist(table, self._column_names) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py new file mode 100644 index 000000000..55fc7a93c --- /dev/null +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -0,0 +1,77 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import KNearestNeighborsImputer +from safeds.exceptions import ColumnNotFoundError, TransformerNotFittedError + + +class TestInit: + def test_should_raise_value_error(self) -> None: + with pytest.raises(ValueError, match='Parameter "neighbor_count" must be greater than 0.'): + _ = KNearestNeighborsImputer(neighbor_count=0) + + def test_should_raise_type_error(self) -> None: + with pytest.raises(TypeError, match='Parameter "neighbor_count" must be a integer.'): + _ = KNearestNeighborsImputer(neighbor_count=1.5) + + +class TestFit: + def test_should_raise_if_column_not_found(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + with pytest.raises(ColumnNotFoundError): + KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) + + def test_should_raise_if_table_contains_no_rows(self) -> None: + with pytest.raises(ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows"): + KNearestNeighborsImputer().fit(Table({"col1": []})) + + def test_should_not_change_original_transformer(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = KNearestNeighborsImputer() + transformer.fit(table) + + assert transformer._column_names is None + assert transformer._wrapped_transformer is None + + +class TestTransform: + def test_should_raise_if_column_not_found(self) -> None: + table_to_fit = Table( + { + "col1": [0.0, 5.0, 10.0], + "col2": [5.0, 50.0, 100.0], + }, + ) + + transformer = KNearestNeighborsImputer() + transformer.fit(table_to_fit) + + table_to_transform = Table( + { + "col3": ["a", "b", "c"], + }, + ) + + with pytest.raises(ColumnNotFoundError): + transformer.transform(table_to_transform) + + def test_should_raise_if_not_fitted(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = KNearestNeighborsImputer() + + with pytest.raises(TransformerNotFittedError): + transformer.transform(table) From 36640fd23eaf0b570a4131a1e696754df45c6ae4 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 16:03:43 +0200 Subject: [PATCH 04/30] more and better test --- .../transformation/test_k_nearest_neighbors_imputer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 55fc7a93c..be59a52d7 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -9,10 +9,6 @@ def test_should_raise_value_error(self) -> None: with pytest.raises(ValueError, match='Parameter "neighbor_count" must be greater than 0.'): _ = KNearestNeighborsImputer(neighbor_count=0) - def test_should_raise_type_error(self) -> None: - with pytest.raises(TypeError, match='Parameter "neighbor_count" must be a integer.'): - _ = KNearestNeighborsImputer(neighbor_count=1.5) - class TestFit: def test_should_raise_if_column_not_found(self) -> None: @@ -53,7 +49,6 @@ def test_should_raise_if_column_not_found(self) -> None: ) transformer = KNearestNeighborsImputer() - transformer.fit(table_to_fit) table_to_transform = Table( { @@ -62,7 +57,7 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(ColumnNotFoundError): - transformer.transform(table_to_transform) + transformer.fit(table_to_fit).transform(table_to_transform) def test_should_raise_if_not_fitted(self) -> None: table = Table( From 6450e3de60259a5b4236cf5c9b8570b3e8119cb9 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 16:03:55 +0200 Subject: [PATCH 05/30] removed typechecking for init --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index d419c15f9..653d5dfcf 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -37,8 +37,6 @@ def __init__( ) -> None: super().__init__(column_names) - if not isinstance(neighbor_count, int): - raise TypeError('Parameter "neighbor_count" must be a integer.') if neighbor_count <= 0: raise ValueError('Parameter "neighbor_count" must be greater than 0.') @@ -101,13 +99,13 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: column_names = self._column_names _check_columns_exist(table, column_names) - wrapped_transformer = sk_KNNImputer(missing_values=self._value_to_replace, n_neighbors=self._neighbor_count) + wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) wrapped_transformer.set_output(transform="polars") wrapped_transformer.fit( table.remove_columns_except(column_names)._data_frame, ) - result = KNearestNeighborsImputer(self._neighbor_count, column_names=column_names, value_to_replace=self._value_to_replace) + result = KNearestNeighborsImputer(self._neighbor_count, column_names=column_names) result._wrapped_transformer = wrapped_transformer return result From 83f7d925dae2b88b7a77bade0b20f6e691ad5e07 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 21 Jun 2024 16:33:52 +0200 Subject: [PATCH 06/30] end of day --- .../_k_nearest_neighbors_imputer.py | 5 +- .../test_k_nearest_neighbors_imputer.py | 65 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 653d5dfcf..3766ffd22 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -136,6 +136,9 @@ def transform(self, table: Table) -> Table: if self._column_names is None or self._wrapped_transformer is None: raise TransformerNotFittedError + if table.row_count == 0: + raise ValueError("The table cannot be transformed because it contains 0 rows") + _check_columns_exist(table, self._column_names) new_data = self._wrapped_transformer.transform( @@ -143,5 +146,5 @@ def transform(self, table: Table) -> Table: ) return Table._from_polars_lazy_frame( - table._lazy_frame.update(new_data.lazify()), + table._lazy_frame.with_columns(new_data), ) \ No newline at end of file diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index be59a52d7..07cdc8aea 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -70,3 +70,68 @@ def test_should_raise_if_not_fitted(self) -> None: with pytest.raises(TransformerNotFittedError): transformer.transform(table) + + +class TestIsFitted: + def test_should_return_false_before_fitting(self) -> None: + transformer = KNearestNeighborsImputer() + assert not transformer.is_fitted + + def test_should_return_true_after_fitting(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = KNearestNeighborsImputer() + fitted_transformer = transformer.fit(table) + assert fitted_transformer.is_fitted + + +class TestFitAndTransform: + @pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + }, + ), + None, + Table( + { + "col1": [0.0, 0.5, 0.5, 1.0], + }, + ), + ), + ( + Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + "col2": [0.0, 5.0, 5.0, 10.0], + }, + ), + ["col1"], + Table( + { + "col1": [0.0, 0.5, 0.5, 1.0], + "col2": [0.0, 5.0, 5.0, 10.0], + }, + ), + ), + ], + ids=["one_column", "two_columns"], + ) + def test_should_return_fitted_transformer_and_transformed_table( + self, + table: Table, + column_names: list[str] | None, + expected: Table, + ) -> None: + fitted_transformer, transformed_table = KNearestNeighborsImputer(column_names=column_names).fit_and_transform(table) + assert fitted_transformer.is_fitted + assert transformed_table == expected + + \ No newline at end of file From a8e9fd991a9c1c64a174cc9948d5ae7676a610af Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Tue, 25 Jun 2024 14:29:37 +0200 Subject: [PATCH 07/30] wrote all tests and everything working accordingly --- .../_k_nearest_neighbors_imputer.py | 4 +- .../test_k_nearest_neighbors_imputer.py | 86 ++++++++++++++++--- 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 3766ffd22..36790333b 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -14,7 +14,7 @@ class KNearestNeighborsImputer(TableTransformer): """ - The KNearestNeighborsImputer replaces missing values in a table with the mean value of the K-nearest neighbors. + The KNearestNeighborsImputer replaces missing values in a with the mean value of the K-nearest neighbors. Parameters ---------- @@ -98,6 +98,8 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: else: column_names = self._column_names _check_columns_exist(table, column_names) + + # TODO maybe add a check which tests if the value to replace is even in the tabel wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) wrapped_transformer.set_output(transform="polars") diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 07cdc8aea..11b5f6c9a 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import KNearestNeighborsImputer @@ -96,28 +97,30 @@ class TestFitAndTransform: ( Table( { - "col1": [0.0, 5.0, 5.0, 10.0], + "col1": [1, 2, np.nan], + "col2": [1, 2, 3], }, ), - None, + ["col1"], Table( { - "col1": [0.0, 0.5, 0.5, 1.0], + "col1": [1, 2, 2], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 2, 3], }, ), ), ( Table( { - "col1": [0.0, 5.0, 5.0, 10.0], - "col2": [0.0, 5.0, 5.0, 10.0], + "col1": [1, 2, np.nan, 4], + "col2": [1, 2, 3, 4], }, ), ["col1"], Table( { - "col1": [0.0, 0.5, 0.5, 1.0], - "col2": [0.0, 5.0, 5.0, 10.0], + "col1": [1, 2, 2, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 2, 3, 4], }, ), ), @@ -127,11 +130,74 @@ class TestFitAndTransform: def test_should_return_fitted_transformer_and_transformed_table( self, table: Table, - column_names: list[str] | None, + column_names: list[str] | None, # noqa: ARG002 + expected: Table, + ) -> None: + fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=1,column_names=None, value_to_replace=np.nan).fit_and_transform(table) + assert fitted_transformer.is_fitted + assert transformed_table == expected + + @pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + Table( + { + "col1": [1, 2, np.nan], + "col2": [1, 2, 3], + }, + ), + ["col1"], + Table( + { + "col1": [1, 2, 3/2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 + "col2": [1, 2, 3], + }, + ), + ), + ( + Table( + { + "col1": [1, 2, np.nan, 4], + "col2": [1, np.nan, 3, 4], + }, + ), + ["col1"], + Table( + { + "col1": [1, 2, 7/3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 8/3, 3, 4], + }, + ), + ), + ], + ids=["one_column", "two_columns"], + ) + + + def test_should_return_fitted_transformer_and_transformed_table_with_correct_range( + self, + table: Table, + column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer(column_names=column_names).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=3, value_to_replace=np.nan).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected - \ No newline at end of file + def test_should_not_change_original_table(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + KNearestNeighborsImputer().fit_and_transform(table) + + expected = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + assert table == expected \ No newline at end of file From aa69dce7f7aa26949d05f418a0aec2a00eedb5b4 Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Fri, 28 Jun 2024 10:08:58 +0200 Subject: [PATCH 08/30] renamed a test and removed a wrong todo --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 -- .../tabular/transformation/test_k_nearest_neighbors_imputer.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 36790333b..3d0a05d37 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -98,8 +98,6 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: else: column_names = self._column_names _check_columns_exist(table, column_names) - - # TODO maybe add a check which tests if the value to replace is even in the tabel wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) wrapped_transformer.set_output(transform="polars") diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 11b5f6c9a..76658ff73 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -175,7 +175,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ) - def test_should_return_fitted_transformer_and_transformed_table_with_correct_range( + def test_should_return_fitted_transformer_and_transformed_table_with_correct_values( self, table: Table, column_names: list[str] | None, # noqa: ARG002 From 8dfb8c40ae202d06256c456c4aa069f5a886387b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 08:13:35 +0000 Subject: [PATCH 09/30] style: apply automated linter fixes --- .../_k_nearest_neighbors_imputer.py | 24 +++++++++-------- .../test_k_nearest_neighbors_imputer.py | 26 +++++++++++-------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 3d0a05d37..59cf8672c 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -12,6 +12,7 @@ if TYPE_CHECKING: from sklearn.impute import KNNImputer as sk_KNNImputer + class KNearestNeighborsImputer(TableTransformer): """ The KNearestNeighborsImputer replaces missing values in a with the mean value of the K-nearest neighbors. @@ -21,7 +22,7 @@ class KNearestNeighborsImputer(TableTransformer): neighbor_count: The number of neighbors to consider when imputing missing values. column_names: - The list of columns used to impute missing values. If 'None', all columns are used. + The list of columns used to impute missing values. If 'None', all columns are used. """ # ------------------------------------------------------------------------------------------------------------------ @@ -29,11 +30,11 @@ class KNearestNeighborsImputer(TableTransformer): # ------------------------------------------------------------------------------------------------------------------ def __init__( - self, - neighbor_count: int = 5, - *, - column_names: str | list[str] | None = None, - value_to_replace: float | str | None = None, + self, + neighbor_count: int = 5, + *, + column_names: str | list[str] | None = None, + value_to_replace: float | str | None = None, ) -> None: super().__init__(column_names) @@ -49,6 +50,7 @@ def __init__( def __hash__(self) -> int: return _structural_hash(self) + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -57,7 +59,7 @@ def __hash__(self) -> int: def is_fitted(self) -> bool: """Whether the transformer is fitted.""" return self._wrapped_transformer is not None - + @property def neighbor_count(self) -> int: """The number of neighbors to consider when imputing missing values.""" @@ -92,13 +94,13 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: if table.row_count == 0: raise ValueError("The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows.") - + if self._column_names is None: column_names = table.column_names else: column_names = self._column_names _check_columns_exist(table, column_names) - + wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) wrapped_transformer.set_output(transform="polars") wrapped_transformer.fit( @@ -109,7 +111,7 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: result._wrapped_transformer = wrapped_transformer return result - + def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. @@ -147,4 +149,4 @@ def transform(self, table: Table) -> Table: return Table._from_polars_lazy_frame( table._lazy_frame.with_columns(new_data), - ) \ No newline at end of file + ) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 76658ff73..5545b5ba5 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -20,10 +20,12 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(ColumnNotFoundError): - KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) - + KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) + def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises(ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows"): + with pytest.raises( + ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", + ): KNearestNeighborsImputer().fit(Table({"col1": []})) def test_should_not_change_original_transformer(self) -> None: @@ -133,7 +135,9 @@ def test_should_return_fitted_transformer_and_transformed_table( column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=1,column_names=None, value_to_replace=np.nan).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer( + neighbor_count=1, column_names=None, value_to_replace=np.nan, + ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -150,7 +154,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 3/2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 + "col1": [1, 2, 3 / 2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 "col2": [1, 2, 3], }, ), @@ -165,23 +169,23 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 7/3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. - "col2": [1, 8/3, 3, 4], + "col1": [1, 2, 7 / 3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 8 / 3, 3, 4], }, ), ), ], ids=["one_column", "two_columns"], ) - - def test_should_return_fitted_transformer_and_transformed_table_with_correct_values( self, table: Table, column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=3, value_to_replace=np.nan).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer( + neighbor_count=3, value_to_replace=np.nan, + ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -200,4 +204,4 @@ def test_should_not_change_original_table(self) -> None: }, ) - assert table == expected \ No newline at end of file + assert table == expected From d4375b75d5eaa8ec739413f8756c22fadf0399af Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 08:15:01 +0000 Subject: [PATCH 10/30] style: apply automated linter fixes --- .../transformation/test_k_nearest_neighbors_imputer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 5545b5ba5..edfb9a4b0 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -24,7 +24,8 @@ def test_should_raise_if_column_not_found(self) -> None: def test_should_raise_if_table_contains_no_rows(self) -> None: with pytest.raises( - ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", + ValueError, + match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", ): KNearestNeighborsImputer().fit(Table({"col1": []})) @@ -136,7 +137,9 @@ def test_should_return_fitted_transformer_and_transformed_table( expected: Table, ) -> None: fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=1, column_names=None, value_to_replace=np.nan, + neighbor_count=1, + column_names=None, + value_to_replace=np.nan, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -184,7 +187,8 @@ def test_should_return_fitted_transformer_and_transformed_table_with_correct_val expected: Table, ) -> None: fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=3, value_to_replace=np.nan, + neighbor_count=3, + value_to_replace=np.nan, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected From 054fba2976456b6303464eb46f7f964dc708b23f Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Fri, 28 Jun 2024 10:42:02 +0200 Subject: [PATCH 11/30] how should we test the __hash__ function? --- .../test_k_nearest_neighbors_imputer.py | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index edfb9a4b0..c1f41aa44 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -10,6 +10,9 @@ def test_should_raise_value_error(self) -> None: with pytest.raises(ValueError, match='Parameter "neighbor_count" must be greater than 0.'): _ = KNearestNeighborsImputer(neighbor_count=0) + def test_neighbor_count(self) -> None: + knn = KNearestNeighborsImputer(neighbor_count=5) + assert knn.neighbor_count == 5 class TestFit: def test_should_raise_if_column_not_found(self) -> None: @@ -20,13 +23,10 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(ColumnNotFoundError): - KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) - + KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) + def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises( - ValueError, - match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", - ): + with pytest.raises(ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows"): KNearestNeighborsImputer().fit(Table({"col1": []})) def test_should_not_change_original_transformer(self) -> None: @@ -136,11 +136,7 @@ def test_should_return_fitted_transformer_and_transformed_table( column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=1, - column_names=None, - value_to_replace=np.nan, - ).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=1,column_names=None, value_to_replace=np.nan).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -157,7 +153,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 3 / 2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 + "col1": [1, 2, 3/2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 "col2": [1, 2, 3], }, ), @@ -172,24 +168,23 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 7 / 3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. - "col2": [1, 8 / 3, 3, 4], + "col1": [1, 2, 7/3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 8/3, 3, 4], }, ), ), ], ids=["one_column", "two_columns"], ) + + def test_should_return_fitted_transformer_and_transformed_table_with_correct_values( self, table: Table, column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=3, - value_to_replace=np.nan, - ).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=3, value_to_replace=np.nan).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -208,4 +203,4 @@ def test_should_not_change_original_table(self) -> None: }, ) - assert table == expected + assert table == expected \ No newline at end of file From 1c1bfa45da0e92cc290fd4b367517081094cfeac Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 08:43:34 +0000 Subject: [PATCH 12/30] style: apply automated linter fixes --- .../test_k_nearest_neighbors_imputer.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index c1f41aa44..c151fab32 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -14,6 +14,7 @@ def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.neighbor_count == 5 + class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( @@ -23,10 +24,12 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(ColumnNotFoundError): - KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) - + KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) + def test_should_raise_if_table_contains_no_rows(self) -> None: - with pytest.raises(ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows"): + with pytest.raises( + ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", + ): KNearestNeighborsImputer().fit(Table({"col1": []})) def test_should_not_change_original_transformer(self) -> None: @@ -136,7 +139,9 @@ def test_should_return_fitted_transformer_and_transformed_table( column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=1,column_names=None, value_to_replace=np.nan).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer( + neighbor_count=1, column_names=None, value_to_replace=np.nan, + ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -153,7 +158,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 3/2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 + "col1": [1, 2, 3 / 2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 "col2": [1, 2, 3], }, ), @@ -168,23 +173,23 @@ def test_should_return_fitted_transformer_and_transformed_table( ["col1"], Table( { - "col1": [1, 2, 7/3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. - "col2": [1, 8/3, 3, 4], + "col1": [1, 2, 7 / 3, 4], # Assuming k=1, the nearest neighbor for the missing value is 2. + "col2": [1, 8 / 3, 3, 4], }, ), ), ], ids=["one_column", "two_columns"], ) - - def test_should_return_fitted_transformer_and_transformed_table_with_correct_values( self, table: Table, column_names: list[str] | None, # noqa: ARG002 expected: Table, ) -> None: - fitted_transformer, transformed_table = KNearestNeighborsImputer(neighbor_count=3, value_to_replace=np.nan).fit_and_transform(table) + fitted_transformer, transformed_table = KNearestNeighborsImputer( + neighbor_count=3, value_to_replace=np.nan, + ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -203,4 +208,4 @@ def test_should_not_change_original_table(self) -> None: }, ) - assert table == expected \ No newline at end of file + assert table == expected From 959a250c501cdcbe5ae0cfea15a970f0b18030fe Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 08:44:59 +0000 Subject: [PATCH 13/30] style: apply automated linter fixes --- .../transformation/test_k_nearest_neighbors_imputer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index c151fab32..4b2aa07b6 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -28,7 +28,8 @@ def test_should_raise_if_column_not_found(self) -> None: def test_should_raise_if_table_contains_no_rows(self) -> None: with pytest.raises( - ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", + ValueError, + match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", ): KNearestNeighborsImputer().fit(Table({"col1": []})) @@ -140,7 +141,9 @@ def test_should_return_fitted_transformer_and_transformed_table( expected: Table, ) -> None: fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=1, column_names=None, value_to_replace=np.nan, + neighbor_count=1, + column_names=None, + value_to_replace=np.nan, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -188,7 +191,8 @@ def test_should_return_fitted_transformer_and_transformed_table_with_correct_val expected: Table, ) -> None: fitted_transformer, transformed_table = KNearestNeighborsImputer( - neighbor_count=3, value_to_replace=np.nan, + neighbor_count=3, + value_to_replace=np.nan, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected From f866aff5705eacb6ef44a1170561a986e6a6432c Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 28 Jun 2024 10:52:52 +0200 Subject: [PATCH 14/30] removed unreachable code --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 59cf8672c..a188ac486 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -138,9 +138,6 @@ def transform(self, table: Table) -> Table: if self._column_names is None or self._wrapped_transformer is None: raise TransformerNotFittedError - if table.row_count == 0: - raise ValueError("The table cannot be transformed because it contains 0 rows") - _check_columns_exist(table, self._column_names) new_data = self._wrapped_transformer.transform( From d568a1f47f75f3a1c0fa8c8ff772326ac8960561 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 28 Jun 2024 11:52:34 +0200 Subject: [PATCH 15/30] added missing word in Knn discription --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index a188ac486..473681041 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -15,7 +15,7 @@ class KNearestNeighborsImputer(TableTransformer): """ - The KNearestNeighborsImputer replaces missing values in a with the mean value of the K-nearest neighbors. + The KNearestNeighborsImputer replaces missing values in given Columns with the mean value of the K-nearest neighbors. Parameters ---------- From b704c034cf6cff88940c075dcf80f4bdce65eb36 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 28 Jun 2024 11:55:48 +0200 Subject: [PATCH 16/30] adjusted tests --- .../test_k_nearest_neighbors_imputer.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 4b2aa07b6..b9b34a5bc 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -151,21 +151,6 @@ def test_should_return_fitted_transformer_and_transformed_table( @pytest.mark.parametrize( ("table", "column_names", "expected"), [ - ( - Table( - { - "col1": [1, 2, np.nan], - "col2": [1, 2, 3], - }, - ), - ["col1"], - Table( - { - "col1": [1, 2, 3 / 2], # Assuming k=1, the nearest neighbor for the missing value is 1.5 - "col2": [1, 2, 3], - }, - ), - ), ( Table( { @@ -182,7 +167,7 @@ def test_should_return_fitted_transformer_and_transformed_table( ), ), ], - ids=["one_column", "two_columns"], + ids=["two_columns"], ) def test_should_return_fitted_transformer_and_transformed_table_with_correct_values( self, From 1412a12181397deab490544b2c55cc3e243280b3 Mon Sep 17 00:00:00 2001 From: Saman Hushalsadat Date: Fri, 28 Jun 2024 11:57:36 +0200 Subject: [PATCH 17/30] Update src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py Co-authored-by: Lars Reimann --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 473681041..7886f4c3d 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -31,7 +31,7 @@ class KNearestNeighborsImputer(TableTransformer): def __init__( self, - neighbor_count: int = 5, + neighbor_count: int, *, column_names: str | list[str] | None = None, value_to_replace: float | str | None = None, From f6f19747d6f62818ea25fb49a3f2202609151073 Mon Sep 17 00:00:00 2001 From: Saman Hushalsadat Date: Fri, 28 Jun 2024 11:57:52 +0200 Subject: [PATCH 18/30] Update src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py Co-authored-by: Lars Reimann --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 7886f4c3d..8702c1366 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -71,7 +71,7 @@ def neighbor_count(self) -> int: def fit(self, table: Table) -> KNearestNeighborsImputer: """ - Learn a trandformation for a set of columns in a table. + Learn a transformation for a set of columns in a table. This transformer is not modified. From 9e68c09b64ba83b8be5f40d856c1cdaa56b41076 Mon Sep 17 00:00:00 2001 From: Saman Hushalsadat Date: Fri, 28 Jun 2024 11:58:06 +0200 Subject: [PATCH 19/30] Update src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py Co-authored-by: Lars Reimann --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 8702c1366..62fcd7894 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -73,7 +73,7 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: """ Learn a transformation for a set of columns in a table. - This transformer is not modified. + **Note:** This transformer is not modified. Parameters ---------- From 7a5a454f7be3af213fd1bc27470e308f63c98051 Mon Sep 17 00:00:00 2001 From: Saman Hushalsadat Date: Fri, 28 Jun 2024 11:58:15 +0200 Subject: [PATCH 20/30] Update src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py Co-authored-by: Lars Reimann --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 62fcd7894..5e68172c6 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -116,7 +116,7 @@ def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. - The Table is not modified. + **Note:** The given table is not modified. Parameters ---------- From 6f5d4ed82b3ce559530fd28c1cf78694ab87746f Mon Sep 17 00:00:00 2001 From: Saman Hushalsadat Date: Fri, 28 Jun 2024 11:58:25 +0200 Subject: [PATCH 21/30] Update tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py Co-authored-by: Lars Reimann --- .../tabular/transformation/test_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index b9b34a5bc..76ce9f90a 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -8,7 +8,7 @@ class TestInit: def test_should_raise_value_error(self) -> None: with pytest.raises(ValueError, match='Parameter "neighbor_count" must be greater than 0.'): - _ = KNearestNeighborsImputer(neighbor_count=0) + KNearestNeighborsImputer(neighbor_count=0) def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) From e4369372d83e8da0e71303288ad4f1e31795bd52 Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 28 Jun 2024 12:13:26 +0200 Subject: [PATCH 22/30] added '_check_bounds' implementation --- .../transformation/_k_nearest_neighbors_imputer.py | 5 ++--- .../transformation/test_k_nearest_neighbors_imputer.py | 8 ++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 5e68172c6..29071d764 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _check_columns_exist +from safeds._validation import _check_bounds, _check_columns_exist, _ClosedBound from safeds.data.tabular.containers import Table from safeds.exceptions import TransformerNotFittedError @@ -38,8 +38,7 @@ def __init__( ) -> None: super().__init__(column_names) - if neighbor_count <= 0: - raise ValueError('Parameter "neighbor_count" must be greater than 0.') + _check_bounds(name="neighbor_count", actual=neighbor_count, lower_bound=_ClosedBound(1)) # parameter self._neighbor_count: int = neighbor_count diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 76ce9f90a..141f9d53e 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -2,12 +2,16 @@ import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import KNearestNeighborsImputer -from safeds.exceptions import ColumnNotFoundError, TransformerNotFittedError +from safeds.exceptions import ( + ColumnNotFoundError, + OutOfBoundsError, + TransformerNotFittedError, +) class TestInit: def test_should_raise_value_error(self) -> None: - with pytest.raises(ValueError, match='Parameter "neighbor_count" must be greater than 0.'): + with pytest.raises(OutOfBoundsError): KNearestNeighborsImputer(neighbor_count=0) def test_neighbor_count(self) -> None: From ef72b776b3658be2c823b27cd1e0ab309b2a33aa Mon Sep 17 00:00:00 2001 From: Saman Hushi Date: Fri, 28 Jun 2024 12:30:05 +0200 Subject: [PATCH 23/30] added neighbor_count to all tests --- .../test_k_nearest_neighbors_imputer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index 141f9d53e..abc1a450e 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -28,14 +28,14 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(ColumnNotFoundError): - KNearestNeighborsImputer(column_names=["col2", "col3"]).fit(table) + KNearestNeighborsImputer(neighbor_count=5, column_names=["col2", "col3"]).fit(table) def test_should_raise_if_table_contains_no_rows(self) -> None: with pytest.raises( ValueError, match=r"The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows", ): - KNearestNeighborsImputer().fit(Table({"col1": []})) + KNearestNeighborsImputer(neighbor_count=5).fit(Table({"col1": []})) def test_should_not_change_original_transformer(self) -> None: table = Table( @@ -44,7 +44,7 @@ def test_should_not_change_original_transformer(self) -> None: }, ) - transformer = KNearestNeighborsImputer() + transformer = KNearestNeighborsImputer(neighbor_count=5) transformer.fit(table) assert transformer._column_names is None @@ -60,7 +60,7 @@ def test_should_raise_if_column_not_found(self) -> None: }, ) - transformer = KNearestNeighborsImputer() + transformer = KNearestNeighborsImputer(neighbor_count=5) table_to_transform = Table( { @@ -78,7 +78,7 @@ def test_should_raise_if_not_fitted(self) -> None: }, ) - transformer = KNearestNeighborsImputer() + transformer = KNearestNeighborsImputer(neighbor_count=5) with pytest.raises(TransformerNotFittedError): transformer.transform(table) @@ -86,7 +86,7 @@ def test_should_raise_if_not_fitted(self) -> None: class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: - transformer = KNearestNeighborsImputer() + transformer = KNearestNeighborsImputer(neighbor_count=5) assert not transformer.is_fitted def test_should_return_true_after_fitting(self) -> None: @@ -96,7 +96,7 @@ def test_should_return_true_after_fitting(self) -> None: }, ) - transformer = KNearestNeighborsImputer() + transformer = KNearestNeighborsImputer(neighbor_count=5) fitted_transformer = transformer.fit(table) assert fitted_transformer.is_fitted @@ -193,7 +193,7 @@ def test_should_not_change_original_table(self) -> None: }, ) - KNearestNeighborsImputer().fit_and_transform(table) + KNearestNeighborsImputer(neighbor_count=5).fit_and_transform(table) expected = Table( { From c4caca765b9bb090f6a53900cdf38bf17445f847 Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Mon, 1 Jul 2024 12:52:02 +0200 Subject: [PATCH 24/30] should have 100% conver now and hashing implemented like in SimpleImputer --- .../_k_nearest_neighbors_imputer.py | 16 +++++++++++++--- .../test_k_nearest_neighbors_imputer.py | 14 ++++++-------- .../transformation/test_table_transformer.py | 3 +++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 29071d764..eca09dc20 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -2,6 +2,8 @@ from typing import TYPE_CHECKING +from numpy import nan + from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _check_columns_exist, _ClosedBound from safeds.data.tabular.containers import Table @@ -23,6 +25,8 @@ class KNearestNeighborsImputer(TableTransformer): The number of neighbors to consider when imputing missing values. column_names: The list of columns used to impute missing values. If 'None', all columns are used. + value_to_replace: + The placeholder for the missing values. All occurrences of`missing_values` will be imputed. """ # ------------------------------------------------------------------------------------------------------------------ @@ -34,7 +38,7 @@ def __init__( neighbor_count: int, *, column_names: str | list[str] | None = None, - value_to_replace: float | str | None = None, + value_to_replace: float | str | None = nan, ) -> None: super().__init__(column_names) @@ -42,13 +46,19 @@ def __init__( # parameter self._neighbor_count: int = neighbor_count - self._value_to_replace: float | str | None = value_to_replace + self._value_to_replace: float | str | None = value_to_replace if value_to_replace is not None else nan # attributes self._wrapped_transformer: sk_KNNImputer | None = None def __hash__(self) -> int: - return _structural_hash(self) + return _structural_hash( + super().__hash__(), + self._neighbor_count, + self._value_to_replace, + # Leave out the internal state for faster hashing + ) + # ------------------------------------------------------------------------------------------------------------------ # Properties diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index abc1a450e..e0c796877 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -17,8 +17,6 @@ def test_should_raise_value_error(self) -> None: def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.neighbor_count == 5 - - class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( @@ -108,7 +106,7 @@ class TestFitAndTransform: ( Table( { - "col1": [1, 2, np.nan], + "col1": [1, 2, None], "col2": [1, 2, 3], }, ), @@ -123,7 +121,7 @@ class TestFitAndTransform: ( Table( { - "col1": [1, 2, np.nan, 4], + "col1": [1, 2, None, 4], "col2": [1, 2, 3, 4], }, ), @@ -147,7 +145,7 @@ def test_should_return_fitted_transformer_and_transformed_table( fitted_transformer, transformed_table = KNearestNeighborsImputer( neighbor_count=1, column_names=None, - value_to_replace=np.nan, + value_to_replace=None, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected @@ -158,8 +156,8 @@ def test_should_return_fitted_transformer_and_transformed_table( ( Table( { - "col1": [1, 2, np.nan, 4], - "col2": [1, np.nan, 3, 4], + "col1": [1, 2, None, 4], + "col2": [1, None, 3, 4], }, ), ["col1"], @@ -181,7 +179,7 @@ def test_should_return_fitted_transformer_and_transformed_table_with_correct_val ) -> None: fitted_transformer, transformed_table = KNearestNeighborsImputer( neighbor_count=3, - value_to_replace=np.nan, + value_to_replace=None, ).fit_and_transform(table) assert fitted_transformer.is_fitted assert transformed_table == expected diff --git a/tests/safeds/data/tabular/transformation/test_table_transformer.py b/tests/safeds/data/tabular/transformation/test_table_transformer.py index 83c374bd3..3724c73aa 100644 --- a/tests/safeds/data/tabular/transformation/test_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_table_transformer.py @@ -4,6 +4,7 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import ( Discretizer, + KNearestNeighborsImputer, LabelEncoder, OneHotEncoder, RangeScaler, @@ -67,6 +68,8 @@ def transformers() -> list[TableTransformer]: + transformers_non_numeric() + [ SimpleImputer(strategy=SimpleImputer.Strategy.mode()), + KNearestNeighborsImputer(neighbor_count=3,value_to_replace=None), + ] ) From 21f3d0c7c08ad7a32041eaba7c31b9098a3dd420 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:53:38 +0000 Subject: [PATCH 25/30] style: apply automated linter fixes --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 3 +-- .../tabular/transformation/test_k_nearest_neighbors_imputer.py | 3 ++- .../data/tabular/transformation/test_table_transformer.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index eca09dc20..f77dee910 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -25,7 +25,7 @@ class KNearestNeighborsImputer(TableTransformer): The number of neighbors to consider when imputing missing values. column_names: The list of columns used to impute missing values. If 'None', all columns are used. - value_to_replace: + value_to_replace: The placeholder for the missing values. All occurrences of`missing_values` will be imputed. """ @@ -59,7 +59,6 @@ def __hash__(self) -> int: # Leave out the internal state for faster hashing ) - # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index e0c796877..e3652a46c 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -1,4 +1,3 @@ -import numpy as np import pytest from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import KNearestNeighborsImputer @@ -17,6 +16,8 @@ def test_should_raise_value_error(self) -> None: def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.neighbor_count == 5 + + class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( diff --git a/tests/safeds/data/tabular/transformation/test_table_transformer.py b/tests/safeds/data/tabular/transformation/test_table_transformer.py index 3724c73aa..d03b157e1 100644 --- a/tests/safeds/data/tabular/transformation/test_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_table_transformer.py @@ -68,8 +68,7 @@ def transformers() -> list[TableTransformer]: + transformers_non_numeric() + [ SimpleImputer(strategy=SimpleImputer.Strategy.mode()), - KNearestNeighborsImputer(neighbor_count=3,value_to_replace=None), - + KNearestNeighborsImputer(neighbor_count=3, value_to_replace=None), ] ) From 55d4a71869ed92e4a743462de20e573859321850 Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Tue, 2 Jul 2024 11:47:38 +0200 Subject: [PATCH 26/30] added property value_to_replace changed nan into fit and the import also --- .../transformation/_k_nearest_neighbors_imputer.py | 14 ++++++++++---- .../test_k_nearest_neighbors_imputer.py | 8 ++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index f77dee910..0ace888e6 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from numpy import nan - from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _check_columns_exist, _ClosedBound from safeds.data.tabular.containers import Table @@ -38,7 +36,7 @@ def __init__( neighbor_count: int, *, column_names: str | list[str] | None = None, - value_to_replace: float | str | None = nan, + value_to_replace: float | str | None = None, ) -> None: super().__init__(column_names) @@ -46,7 +44,7 @@ def __init__( # parameter self._neighbor_count: int = neighbor_count - self._value_to_replace: float | str | None = value_to_replace if value_to_replace is not None else nan + self._value_to_replace: float | str | None = value_to_replace # attributes self._wrapped_transformer: sk_KNNImputer | None = None @@ -73,6 +71,10 @@ def neighbor_count(self) -> int: """The number of neighbors to consider when imputing missing values.""" return self._neighbor_count + @property + def value_to_replace(self) -> float | str | None: + """The value to replace.""" + return self._value_to_replace # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation # ------------------------------------------------------------------------------------------------------------------ @@ -98,6 +100,7 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: ColumnNotFoundError If one of the columns, that should be fitted is not in the table. """ + from numpy import nan from sklearn.impute import KNNImputer as sk_KNNImputer if table.row_count == 0: @@ -108,6 +111,9 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: else: column_names = self._column_names _check_columns_exist(table, column_names) + + if self._value_to_replace is None: + self._value_to_replace = nan wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) wrapped_transformer.set_output(transform="polars") diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index e3652a46c..cdac29eb8 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -17,6 +17,14 @@ def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.neighbor_count == 5 + def test_value_to_replace_none(self)->None: + knn = KNearestNeighborsImputer(neighbor_count=5) + assert knn.value_to_replace is None + + def test_value_to_replace_number(self)->None: + knn = KNearestNeighborsImputer(neighbor_count=5, value_to_replace=1) + assert knn.value_to_replace == 1 + class TestFit: def test_should_raise_if_column_not_found(self) -> None: From 3b670cacd1a65603479dfba4d89491eb8c7a0c9e Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Tue, 2 Jul 2024 11:51:19 +0200 Subject: [PATCH 27/30] removed the import of nan into the if statement --- .../data/tabular/transformation/_k_nearest_neighbors_imputer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 0ace888e6..6a1d8d242 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -100,7 +100,6 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: ColumnNotFoundError If one of the columns, that should be fitted is not in the table. """ - from numpy import nan from sklearn.impute import KNNImputer as sk_KNNImputer if table.row_count == 0: @@ -113,6 +112,7 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: _check_columns_exist(table, column_names) if self._value_to_replace is None: + from numpy import nan self._value_to_replace = nan wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) From 3fc7a6214614230aa42f91a4e15badd95c0c8529 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 2 Jul 2024 09:52:58 +0000 Subject: [PATCH 28/30] style: apply automated linter fixes --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 4 +++- .../transformation/test_k_nearest_neighbors_imputer.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 6a1d8d242..3fc7b66ce 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -75,6 +75,7 @@ def neighbor_count(self) -> int: def value_to_replace(self) -> float | str | None: """The value to replace.""" return self._value_to_replace + # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation # ------------------------------------------------------------------------------------------------------------------ @@ -110,9 +111,10 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: else: column_names = self._column_names _check_columns_exist(table, column_names) - + if self._value_to_replace is None: from numpy import nan + self._value_to_replace = nan wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) diff --git a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py index cdac29eb8..de0700dbf 100644 --- a/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_k_nearest_neighbors_imputer.py @@ -17,11 +17,11 @@ def test_neighbor_count(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.neighbor_count == 5 - def test_value_to_replace_none(self)->None: + def test_value_to_replace_none(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5) assert knn.value_to_replace is None - - def test_value_to_replace_number(self)->None: + + def test_value_to_replace_number(self) -> None: knn = KNearestNeighborsImputer(neighbor_count=5, value_to_replace=1) assert knn.value_to_replace == 1 From 489d329d0117629bf351dd1ab622d760ea142adf Mon Sep 17 00:00:00 2001 From: peplaul0 Date: Tue, 2 Jul 2024 12:17:53 +0200 Subject: [PATCH 29/30] now using var: value_to_replace for correct usage_ --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 3fc7b66ce..09a70bca2 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -112,12 +112,13 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: column_names = self._column_names _check_columns_exist(table, column_names) + value_to_replace = self._value_to_replace + if self._value_to_replace is None: from numpy import nan + value_to_replace = nan - self._value_to_replace = nan - - wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=self._value_to_replace) + wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=value_to_replace) wrapped_transformer.set_output(transform="polars") wrapped_transformer.fit( table.remove_columns_except(column_names)._data_frame, From c63833a6ab04c32a18ec43b21f4bab4d28483be1 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:19:29 +0000 Subject: [PATCH 30/30] style: apply automated linter fixes --- .../tabular/transformation/_k_nearest_neighbors_imputer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py index 09a70bca2..890749ba4 100644 --- a/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py +++ b/src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py @@ -113,9 +113,10 @@ def fit(self, table: Table) -> KNearestNeighborsImputer: _check_columns_exist(table, column_names) value_to_replace = self._value_to_replace - + if self._value_to_replace is None: from numpy import nan + value_to_replace = nan wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=value_to_replace)