Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: hyperparameter optimization for RNNs and CNNs #923

Merged
merged 27 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e1608c1
some changes
sibre28 Jul 30, 2024
3f29393
make _get_best_rnn_model work
sibre28 Aug 5, 2024
6601d14
add column of columns support for classification_metrics
sibre28 Aug 9, 2024
fe6b0f7
update branch
sibre28 Aug 9, 2024
0a00974
add cnn hyperparameter optimization (for ImageToColumn and ImageToTable)
sibre28 Aug 16, 2024
c811399
do some stuff here and there
sibre28 Aug 16, 2024
eaf02cb
updates
sibre28 Aug 16, 2024
96ca5f3
Merge branch 'main' into 912-hyperparamteroptimization-for-rnns-and-cnns
sibre28 Aug 16, 2024
51072a7
linter stuff
sibre28 Aug 16, 2024
f1018e2
linter stuff
sibre28 Aug 16, 2024
61dabd6
Merge remote-tracking branch 'origin/912-hyperparamteroptimization-fo…
sibre28 Aug 16, 2024
03c74ea
add tests for fit_by_exhaustive_search for rnns and cnns
sibre28 Aug 16, 2024
753fba5
linter fixes and add tests
sibre28 Aug 19, 2024
78c7180
linter fixes
sibre28 Aug 19, 2024
3a4233a
linter fixes
sibre28 Aug 19, 2024
897e17a
linter fixes
sibre28 Aug 19, 2024
69aa8e6
remove classification metric functionality for continuous time series…
sibre28 Aug 19, 2024
7f23c69
small update
sibre28 Aug 19, 2024
a360281
linter
sibre28 Aug 19, 2024
bb9263b
linter
sibre28 Aug 19, 2024
7458e75
style: apply automated linter fixes
megalinter-bot Aug 19, 2024
7b66cdd
linter
sibre28 Aug 19, 2024
2bd526e
style: apply automated linter fixes
megalinter-bot Aug 19, 2024
3505277
linter
sibre28 Aug 19, 2024
c6568a1
style: apply automated linter fixes
megalinter-bot Aug 19, 2024
b588b53
linter
sibre28 Aug 19, 2024
76b78ca
Merge remote-tracking branch 'origin/912-hyperparamteroptimization-fo…
sibre28 Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def _into_dataloader_with_window(
batch_size:
The size of data batches that should be loaded at one time.
continuous:
Whether or not to continue the forecast in the steps before forecast horizon.
Whether to continue the forecast in the steps before forecast horizon.

Raises
------
Expand Down
42 changes: 29 additions & 13 deletions src/safeds/ml/metrics/_classification_metrics.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
from typing import Any

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table
from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import ColumnLengthMismatchError

if TYPE_CHECKING:
from safeds.data.tabular.containers import Column


class ClassificationMetrics(ABC):
"""A collection of classification metrics."""
Expand All @@ -18,7 +15,11 @@ class ClassificationMetrics(ABC):
def __init__(self) -> None: ...

@staticmethod
def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> Table:
def summarize(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> Table:
"""
Summarize classification metrics on the given data.

Expand Down Expand Up @@ -53,7 +54,10 @@ def summarize(predicted: Column | TabularDataset, expected: Column | TabularData
)

@staticmethod
def accuracy(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def accuracy(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the accuracy on the given data.

Expand Down Expand Up @@ -87,7 +91,11 @@ def accuracy(predicted: Column | TabularDataset, expected: Column | TabularDatas
return 0.0 # Types are not compatible, so no prediction can be correct

@staticmethod
def f1_score(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def f1_score(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the F₁ score on the given data.

Expand Down Expand Up @@ -122,7 +130,11 @@ def f1_score(predicted: Column | TabularDataset, expected: Column | TabularDatas
return 2 * true_positives / (2 * true_positives + false_positives + false_negatives)

@staticmethod
def precision(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def precision(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the precision on the given data.

Expand Down Expand Up @@ -156,7 +168,11 @@ def precision(predicted: Column | TabularDataset, expected: Column | TabularData
return true_positives / predicted_positives

@staticmethod
def recall(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def recall(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the recall on the given data.

Expand Down Expand Up @@ -190,9 +206,9 @@ def recall(predicted: Column | TabularDataset, expected: Column | TabularDataset
return true_positives / actual_positives


def _extract_target(column_or_dataset: Column | TabularDataset) -> Column:
def _extract_target(column_or_dataset: Column | TabularDataset | TimeSeriesDataset) -> Column:
"""Extract the target column from the given column or dataset."""
if isinstance(column_or_dataset, TabularDataset):
if isinstance(column_or_dataset, TabularDataset | TimeSeriesDataset):
return column_or_dataset.target
else:
return column_or_dataset
Expand Down
92 changes: 83 additions & 9 deletions src/safeds/ml/metrics/_regression_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from abc import ABC, abstractmethod

from safeds.data.labeled.containers import TabularDataset
from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import ColumnLengthMismatchError

Expand All @@ -14,7 +14,10 @@ class RegressionMetrics(ABC):
def __init__(self) -> None: ...

@staticmethod
def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> Table:
def summarize(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> Table:
"""
Summarize regression metrics on the given data.

Expand Down Expand Up @@ -57,7 +60,10 @@ def summarize(predicted: Column | TabularDataset, expected: Column | TabularData
)

@staticmethod
def coefficient_of_determination(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def coefficient_of_determination(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the coefficient of determination (R²) on the given data.

Expand Down Expand Up @@ -92,6 +98,20 @@ def coefficient_of_determination(predicted: Column | TabularDataset, expected: C
predicted = _extract_target(predicted)
_check_equal_length(predicted, expected)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_coefficient_of_determination = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_coefficient_of_determination += RegressionMetrics.coefficient_of_determination(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_coefficient_of_determination / expected.row_count

residual_sum_of_squares = (expected._series - predicted._series).pow(2).sum()
total_sum_of_squares = (expected._series - expected._series.mean()).pow(2).sum()

Expand All @@ -104,7 +124,10 @@ def coefficient_of_determination(predicted: Column | TabularDataset, expected: C
return 1 - residual_sum_of_squares / total_sum_of_squares

@staticmethod
def mean_absolute_error(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_absolute_error(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean absolute error (MAE) on the given data.

Expand All @@ -131,10 +154,27 @@ def mean_absolute_error(predicted: Column | TabularDataset, expected: Column | T
if expected.row_count == 0:
return 0.0 # Everything was predicted correctly (since there is nothing to predict)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_mean_absolute_errors = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_mean_absolute_errors += RegressionMetrics.mean_absolute_error(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_mean_absolute_errors / expected.row_count

return (expected._series - predicted._series).abs().mean()

@staticmethod
def mean_directional_accuracy(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_directional_accuracy(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean directional accuracy (MDA) on the given data.

Expand Down Expand Up @@ -172,7 +212,10 @@ def mean_directional_accuracy(predicted: Column | TabularDataset, expected: Colu
return predicted_directions.eq(expected_directions).mean()

@staticmethod
def mean_squared_error(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_squared_error(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean squared error (MSE) on the given data.

Expand Down Expand Up @@ -201,10 +244,27 @@ def mean_squared_error(predicted: Column | TabularDataset, expected: Column | Ta
if expected.row_count == 0:
return 0.0 # Everything was predicted correctly (since there is nothing to predict)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_mean_squared_errors = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_mean_squared_errors += RegressionMetrics.mean_squared_error(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_mean_squared_errors / expected.row_count

return (expected._series - predicted._series).pow(2).mean()

@staticmethod
def median_absolute_deviation(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def median_absolute_deviation(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the median absolute deviation (MAD) on the given data.

Expand All @@ -231,12 +291,26 @@ def median_absolute_deviation(predicted: Column | TabularDataset, expected: Colu
if expected.row_count == 0:
return 0.0

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_median_absolute_deviation = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_median_absolute_deviation += RegressionMetrics.median_absolute_deviation(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_median_absolute_deviation / expected.row_count

return (expected._series - predicted._series).abs().median()


def _extract_target(column_or_dataset: Column | TabularDataset) -> Column:
def _extract_target(column_or_dataset: Column | TabularDataset | TimeSeriesDataset) -> Column:
"""Extract the target column from the given column or dataset."""
if isinstance(column_or_dataset, TabularDataset):
if isinstance(column_or_dataset, TabularDataset | TimeSeriesDataset):
return column_or_dataset.target
else:
return column_or_dataset
Expand Down
Loading