diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index e9190efb7..29d4aba23 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -1,13 +1,12 @@ """Classes for transforming tabular data.""" -from ._imputer import Imputer, ImputerStrategy +from ._imputer import Imputer from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder from ._table_transformer import InvertibleTableTransformer, TableTransformer __all__ = [ "Imputer", - "ImputerStrategy", "LabelEncoder", "OneHotEncoder", "InvertibleTableTransformer", diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index c5bc7edde..efa4adf09 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -1,6 +1,5 @@ from __future__ import annotations -from abc import ABC, abstractmethod from typing import Any import pandas as pd @@ -8,23 +7,32 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import TableTransformer +from safeds.data.tabular.typing import ImputerStrategy from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError -class ImputerStrategy(ABC): - @abstractmethod - def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: - pass - - class Imputer(TableTransformer): """ - Impute the data for a given Table. + Replace missing values with the given strategy. Parameters ---------- strategy : ImputerStrategy - The strategy used to impute missing values. + The strategy used to impute missing values. Use the classes nested inside `Imputer.Strategy` to specify it. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column, Table + >>> from safeds.data.tabular.transformation import Imputer + >>> + >>> table = Table.from_columns( + ... [ + ... Column("a", [1, 3, None]), + ... Column("b", [None, 2, 3]), + ... ], + ... ) + >>> transformer = Imputer(Imputer.Strategy.Constant(0)) + >>> transformed_table = transformer.fit_and_transform(table) """ class Strategy: diff --git a/src/safeds/data/tabular/typing/__init__.py b/src/safeds/data/tabular/typing/__init__.py index ece061234..9a19c2b5d 100644 --- a/src/safeds/data/tabular/typing/__init__.py +++ b/src/safeds/data/tabular/typing/__init__.py @@ -1,12 +1,14 @@ """Types used to define the schema of a tabular dataset.""" from ._column_type import Anything, Boolean, ColumnType, Integer, RealNumber, String +from ._imputer_strategy import ImputerStrategy from ._schema import Schema __all__ = [ "Anything", "Boolean", "ColumnType", + "ImputerStrategy", "Integer", "RealNumber", "Schema", diff --git a/src/safeds/data/tabular/typing/_imputer_strategy.py b/src/safeds/data/tabular/typing/_imputer_strategy.py new file mode 100644 index 000000000..47315274c --- /dev/null +++ b/src/safeds/data/tabular/typing/_imputer_strategy.py @@ -0,0 +1,15 @@ +from abc import ABC, abstractmethod + +from sklearn.impute import SimpleImputer as sk_SimpleImputer + + +class ImputerStrategy(ABC): + """ + The abstract base class of the different imputation strategies supported by the `Imputer`. + + This class is only needed for type annotations. Use the subclasses nested inside `Imputer.Strategy` instead. + """ + + @abstractmethod + def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: + pass diff --git a/tests/safeds/data/tabular/transformation/test_imputer.py b/tests/safeds/data/tabular/transformation/test_imputer.py index 5edd964c8..daf661246 100644 --- a/tests/safeds/data/tabular/transformation/test_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_imputer.py @@ -1,6 +1,7 @@ import pytest from safeds.data.tabular.containers import Column, Table -from safeds.data.tabular.transformation import Imputer, ImputerStrategy +from safeds.data.tabular.transformation import Imputer +from safeds.data.tabular.typing import ImputerStrategy from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError