diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 9e02405ae..5fa0773d5 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -1,12 +1,16 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import TYPE_CHECKING from sklearn.metrics import accuracy_score as sk_accuracy_score from safeds.data.tabular.containers import Table, TaggedTable from safeds.ml.exceptions import UntaggedTableError +if TYPE_CHECKING: + from typing import Any + class Classifier(ABC): """Abstract base class for all classifiers.""" @@ -100,7 +104,7 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float: return sk_accuracy_score(expected_values._data, predicted_values._data) - def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1) -> float: + def precision(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float: """ Compute the classifier's precision on the given data. @@ -108,7 +112,7 @@ def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1 ---------- validation_or_test_set : TaggedTable The validation or test set. - positive_class : int | str + positive_class : Any The class to be considered positive. All other classes are considered negative. Returns @@ -136,3 +140,80 @@ def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1 if (n_true_positives + n_false_positives) == 0: return 1.0 return n_true_positives / (n_true_positives + n_false_positives) + + def recall(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float: + """ + Compute the classifier's recall on the given data. + + Parameters + ---------- + validation_or_test_set : TaggedTable + The validation or test set. + positive_class : Any + The class to be considered positive. All other classes are considered negative. + + Returns + ------- + recall : float + The calculated recall score, i.e. the ratio of correctly predicted positives to all expected positives. + Return 1 if there are no positive expectations. + """ + if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table): + raise UntaggedTableError + + expected_values = validation_or_test_set.target + predicted_values = self.predict(validation_or_test_set.features).target + + n_true_positives = 0 + n_false_negatives = 0 + + for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True): + if predicted_value == positive_class: + if expected_value == positive_class: + n_true_positives += 1 + elif expected_value == positive_class: + n_false_negatives += 1 + + if (n_true_positives + n_false_negatives) == 0: + return 1.0 + return n_true_positives / (n_true_positives + n_false_negatives) + + def f1_score(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float: + """ + Compute the classifier's $F_1$-score on the given data. + + Parameters + ---------- + validation_or_test_set : TaggedTable + The validation or test set. + positive_class : Any + The class to be considered positive. All other classes are considered negative. + + Returns + ------- + f1_score : float + The calculated $F_1$-score, i.e. the harmonic mean between precision and recall. + Return 1 if there are no positive expectations and predictions. + """ + if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table): + raise UntaggedTableError + + expected_values = validation_or_test_set.target + predicted_values = self.predict(validation_or_test_set.features).target + + n_true_positives = 0 + n_false_negatives = 0 + n_false_positives = 0 + + for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True): + if predicted_value == positive_class: + if expected_value == positive_class: + n_true_positives += 1 + else: + n_false_positives += 1 + elif expected_value == positive_class: + n_false_negatives += 1 + + if (2 * n_true_positives + n_false_positives + n_false_negatives) == 0: + return 1.0 + return 2 * n_true_positives / (2 * n_true_positives + n_false_positives + n_false_negatives) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 8ebfcb948..314531d9a 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -284,3 +284,101 @@ def test_should_return_1_if_never_expected_to_be_positive(self) -> None: def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None: with pytest.raises(UntaggedTableError): DummyClassifier().precision(table) # type: ignore[arg-type] + + +class TestRecall: + def test_should_compare_result(self) -> None: + table = Table.from_dict( + { + "predicted": [1, 1, 0, 2], + "expected": [1, 0, 1, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().recall(table, 1) == 0.5 + + def test_should_compare_result_with_different_types(self) -> None: + table = Table.from_dict( + { + "predicted": [1, "1", "0", "2"], + "expected": [1, 0, 1, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().recall(table, 1) == 0.5 + + def test_should_return_1_if_never_expected_to_be_positive(self) -> None: + table = Table.from_dict( + { + "predicted": ["lol", "1", "0", "2"], + "expected": [2, 0, 5, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().recall(table, 1) == 1.0 + + @pytest.mark.parametrize( + "table", + [ + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["untagged_table"], + ) + def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None: + with pytest.raises(UntaggedTableError): + DummyClassifier().recall(table) # type: ignore[arg-type] + + +class TestF1Score: + def test_should_compare_result(self) -> None: + table = Table.from_dict( + { + "predicted": [1, 1, 0, 2], + "expected": [1, 0, 1, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().f1_score(table, 1) == 0.5 + + def test_should_compare_result_with_different_types(self) -> None: + table = Table.from_dict( + { + "predicted": [1, "1", "0", "2"], + "expected": [1, 0, 1, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().f1_score(table, 1) == pytest.approx(0.6666667) + + def test_should_return_1_if_never_expected_or_predicted_to_be_positive(self) -> None: + table = Table.from_dict( + { + "predicted": ["lol", "1", "0", "2"], + "expected": [2, 0, 2, 2], + }, + ).tag_columns(target_name="expected") + + assert DummyClassifier().f1_score(table, 1) == 1.0 + + @pytest.mark.parametrize( + "table", + [ + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["untagged_table"], + ) + def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None: + with pytest.raises(UntaggedTableError): + DummyClassifier().f1_score(table) # type: ignore[arg-type]