From bf81468db907b1552d050f63f89ed37be7608cee Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 5 May 2024 21:11:41 +0200 Subject: [PATCH 1/2] feat: `Regressor.summarize_metrics` and `Classifier.summarize_metrics` --- .../classical/classification/_classifier.py | 38 ++++++++++++- .../ml/classical/regression/_regressor.py | 56 ++++++++++++++----- .../classification/test_classifier.py | 44 +++++++++++++++ .../ml/classical/regression/test_regressor.py | 44 +++++++++++++++ 4 files changed, 168 insertions(+), 14 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 1ad5792b3..c9a05cff3 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -92,7 +92,43 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: The sklearn Classifier. """ - # noinspection PyProtectedMember + # ------------------------------------------------------------------------------------------------------------------ + # Metrics + # ------------------------------------------------------------------------------------------------------------------ + + def summarize_metrics(self, validation_or_test_set: TabularDataset, positive_class: Any) -> Table: + """ + Summarize the classifier's metrics on the given data. + + Parameters + ---------- + validation_or_test_set: + The validation or test set. + positive_class: + The class to be considered positive. All other classes are considered negative. + + Returns + ------- + metrics: + A table containing the classifier's metrics. + + Raises + ------ + TypeError + If a table is passed instead of a tabular dataset. + """ + accuracy = self.accuracy(validation_or_test_set) + precision = self.precision(validation_or_test_set, positive_class) + recall = self.recall(validation_or_test_set, positive_class) + f1_score = self.f1_score(validation_or_test_set, positive_class) + + return Table( + { + "metric": ["accuracy", "precision", "recall", "f1_score"], + "value": [accuracy, precision, recall, f1_score], + }, + ) + def accuracy(self, validation_or_test_set: TabularDataset) -> float: """ Compute the accuracy of the classifier on the given data. diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 618f68ce7..9d47f3267 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -90,10 +90,13 @@ def _get_sklearn_regressor(self) -> RegressorMixin: The sklearn Regressor. """ - # noinspection PyProtectedMember - def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: + # ------------------------------------------------------------------------------------------------------------------ + # Metrics + # ------------------------------------------------------------------------------------------------------------------ + + def summarize_metrics(self, validation_or_test_set: TabularDataset) -> Table: """ - Compute the mean squared error (MSE) on the given data. + Summarize the regressor's metrics on the given data. Parameters ---------- @@ -102,15 +105,42 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: Returns ------- - mean_squared_error: - The calculated mean squared error (the average of the distance of each individual row squared). + metrics: + A table containing the regressor's metrics. Raises ------ TypeError If a table is passed instead of a tabular dataset. """ - from sklearn.metrics import mean_squared_error as sk_mean_squared_error + mean_absolute_error = self.mean_absolute_error(validation_or_test_set) + mean_squared_error = self.mean_squared_error(validation_or_test_set) + + return Table({ + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [mean_absolute_error, mean_squared_error], + }) + + def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: + """ + Compute the mean absolute error (MAE) of the regressor on the given data. + + Parameters + ---------- + validation_or_test_set: + The validation or test set. + + Returns + ------- + mean_absolute_error: + The calculated mean absolute error (the average of the distance of each individual row). + + Raises + ------ + TypeError + If a table is passed instead of a tabular dataset. + """ + from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table): raise PlainTableError @@ -118,12 +148,12 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: predicted = self.predict(validation_or_test_set.features).target _check_metrics_preconditions(predicted, expected) - return sk_mean_squared_error(expected._data, predicted._data) + return sk_mean_absolute_error(expected._data, predicted._data) # noinspection PyProtectedMember - def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: + def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: """ - Compute the mean absolute error (MAE) of the regressor on the given data. + Compute the mean squared error (MSE) on the given data. Parameters ---------- @@ -132,15 +162,15 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: Returns ------- - mean_absolute_error: - The calculated mean absolute error (the average of the distance of each individual row). + mean_squared_error: + The calculated mean squared error (the average of the distance of each individual row squared). Raises ------ TypeError If a table is passed instead of a tabular dataset. """ - from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error + from sklearn.metrics import mean_squared_error as sk_mean_squared_error if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table): raise PlainTableError @@ -148,7 +178,7 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: predicted = self.predict(validation_or_test_set.features).target _check_metrics_preconditions(predicted, expected) - return sk_mean_absolute_error(expected._data, predicted._data) + return sk_mean_squared_error(expected._data, predicted._data) # noinspection PyProtectedMember diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 5cba32cc7..5e4b06ac4 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -336,6 +336,50 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: pass +class TestSummarizeMetrics: + @pytest.mark.parametrize( + ("predicted", "expected", "result"), + [ + ( + [1, 2], + [1, 2], + Table({ + "metric": ["accuracy", "precision", "recall", "f1_score"], + "value": [1.0, 1.0, 1.0, 1.0], + }), + ), + ], + ) + def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None: + table = Table( + { + "predicted": predicted, + "expected": expected, + }, + ).to_tabular_dataset( + target_name="expected", + ) + + assert DummyClassifier().summarize_metrics(table, 1) == result + + @pytest.mark.parametrize( + "table", + [ + Table( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["table"], + ) + def test_should_raise_if_given_normal_table(self, table: Table) -> None: + with pytest.raises(PlainTableError): + DummyClassifier().summarize_metrics(table, 1) # type: ignore[arg-type] + + class TestAccuracy: def test_with_same_type(self) -> None: table = Table( diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 1a073883f..e290570ea 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -343,6 +343,50 @@ def _get_sklearn_regressor(self) -> RegressorMixin: pass +class TestSummarizeMetrics: + @pytest.mark.parametrize( + ("predicted", "expected", "result"), + [ + ( + [1, 2], + [1, 2], + Table({ + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [0.0, 0.0], + }), + ), + ], + ) + def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None: + table = Table( + { + "predicted": predicted, + "expected": expected, + }, + ).to_tabular_dataset( + target_name="expected", + ) + + assert DummyRegressor().summarize_metrics(table) == result + + @pytest.mark.parametrize( + "table", + [ + Table( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["table"], + ) + def test_should_raise_if_given_normal_table(self, table: Table) -> None: + with pytest.raises(PlainTableError): + DummyRegressor().summarize_metrics(table) # type: ignore[arg-type] + + class TestMeanAbsoluteError: @pytest.mark.parametrize( ("predicted", "expected", "result"), From 9f41b2921670c1673c5be43c80cdad1d0b7cfbcc Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 5 May 2024 19:16:03 +0000 Subject: [PATCH 2/2] style: apply automated linter fixes --- src/safeds/ml/classical/regression/_regressor.py | 10 ++++++---- .../ml/classical/classification/test_classifier.py | 10 ++++++---- tests/safeds/ml/classical/regression/test_regressor.py | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 9d47f3267..1779bbb0e 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -116,10 +116,12 @@ def summarize_metrics(self, validation_or_test_set: TabularDataset) -> Table: mean_absolute_error = self.mean_absolute_error(validation_or_test_set) mean_squared_error = self.mean_squared_error(validation_or_test_set) - return Table({ - "metric": ["mean_absolute_error", "mean_squared_error"], - "value": [mean_absolute_error, mean_squared_error], - }) + return Table( + { + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [mean_absolute_error, mean_squared_error], + }, + ) def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: """ diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 5e4b06ac4..4eb86da73 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -343,10 +343,12 @@ class TestSummarizeMetrics: ( [1, 2], [1, 2], - Table({ - "metric": ["accuracy", "precision", "recall", "f1_score"], - "value": [1.0, 1.0, 1.0, 1.0], - }), + Table( + { + "metric": ["accuracy", "precision", "recall", "f1_score"], + "value": [1.0, 1.0, 1.0, 1.0], + }, + ), ), ], ) diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index e290570ea..90af36e63 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -350,10 +350,12 @@ class TestSummarizeMetrics: ( [1, 2], [1, 2], - Table({ - "metric": ["mean_absolute_error", "mean_squared_error"], - "value": [0.0, 0.0], - }), + Table( + { + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [0.0, 0.0], + }, + ), ), ], )