Skip to content

Commit

Permalink
feat: Added parameter maximum_number_of_learner and learner to `A…
Browse files Browse the repository at this point in the history
…daBoost` (#269)

Closes #171 and #173.

### Summary of Changes

feat: Added parameter `maximum_number_of_learner` to `AdaBoost`
feat: Added parameter `learner` to `AdaBoost`
feat: Added private abstract Method `_get_scikit_learner` in
`Classifier` and `Regressor`
refactor: Improved error messages in `classification` and `regression`
docs: Improved docstrings for raised `ValueErrors` in `classification`
and `regression`

---------

Co-authored-by: Alexander Gréus <[email protected]>
Co-authored-by: megalinter-bot <[email protected]>
Co-authored-by: Lars Reimann <[email protected]>
  • Loading branch information
4 people authored May 5, 2023
1 parent 2cf93cc commit bb5a07e
Show file tree
Hide file tree
Showing 34 changed files with 454 additions and 108 deletions.
49 changes: 43 additions & 6 deletions src/safeds/ml/classical/classification/_ada_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,47 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


class AdaBoost(Classifier):
"""Ada Boost classification.
"""
Ada Boost classification.
Parameters
----------
learner: Classifier
The learner from which the boosted ensemble is built.
maximum_number_of_learners: int
The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure
is stopped early. Has to be greater than 0.
learning_rate : float
Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution
of each classifier. Has to be greater than 0.
Raises
------
ValueError
If the learning rate is less than or equal to 0.
If `maximum_number_of_learners` or `learning_rate` are less than or equal to 0
"""

def __init__(self, learning_rate: float = 1.0) -> None:
def __init__(
self,
learner: Classifier | None = None,
maximum_number_of_learners: int = 50,
learning_rate: float = 1.0,
) -> None:
# Validation
if maximum_number_of_learners <= 0:
raise ValueError("The parameter 'maximum_number_of_learners' has to be grater than 0.")
if learning_rate <= 0:
raise ValueError("The learning rate has to be greater than 0.")
raise ValueError("The parameter 'learning_rate' has to be greater than 0.")

# Hyperparameters
self._learner = learner
self._maximum_number_of_learners = maximum_number_of_learners
self._learning_rate = learning_rate

# Internal state
Expand Down Expand Up @@ -61,10 +78,14 @@ def fit(self, training_set: TaggedTable) -> AdaBoost:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_AdaBoostClassifier(learning_rate=self._learning_rate)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = AdaBoost(learning_rate=self._learning_rate)
result = AdaBoost(
learner=self._learner,
maximum_number_of_learners=self._maximum_number_of_learners,
learning_rate=self._learning_rate,
)
result._wrapped_classifier = wrapped_classifier
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down Expand Up @@ -108,3 +129,19 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
learner = self._learner._get_sklearn_classifier() if self._learner is not None else None
return sk_AdaBoostClassifier(
estimator=learner,
n_estimators=self._maximum_number_of_learners,
learning_rate=self._learning_rate,
)
13 changes: 13 additions & 0 deletions src/safeds/ml/classical/classification/_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
if TYPE_CHECKING:
from typing import Any

from sklearn.base import ClassifierMixin


class Classifier(ABC):
"""Abstract base class for all classifiers."""
Expand Down Expand Up @@ -76,6 +78,17 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""

@abstractmethod
def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""

# noinspection PyProtectedMember
def accuracy(self, validation_or_test_set: TaggedTable) -> float:
"""
Expand Down
7 changes: 6 additions & 1 deletion src/safeds/ml/classical/classification/_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand Down Expand Up @@ -42,7 +44,7 @@ def fit(self, training_set: TaggedTable) -> DecisionTree:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_DecisionTreeClassifier()
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = DecisionTree()
Expand Down Expand Up @@ -89,3 +91,6 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
return sk_DecisionTreeClassifier()
24 changes: 17 additions & 7 deletions src/safeds/ml/classical/classification/_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand All @@ -28,15 +30,15 @@ class GradientBoosting(Classifier):
Raises
------
ValueError
If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
If `number_of_trees` is less than or equal to 0 or `learning_rate` is non-positive.
"""

def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
# Validation
if number_of_trees <= 0:
raise ValueError("The number of boosting stages to perform has to be greater than 0.")
raise ValueError("The parameter 'number_of_trees' has to be greater than 0.")
if learning_rate <= 0:
raise ValueError("The learning rate has to be greater than 0.")
raise ValueError("The parameter 'learning_rate' has to be greater than 0.")

# Hyperparameters
self._number_of_trees = number_of_trees
Expand Down Expand Up @@ -68,10 +70,7 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_GradientBoostingClassifier(
n_estimators=self._number_of_trees,
learning_rate=self._learning_rate,
)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
Expand Down Expand Up @@ -118,3 +117,14 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
return sk_GradientBoostingClassifier(n_estimators=self._number_of_trees, learning_rate=self._learning_rate)
26 changes: 19 additions & 7 deletions src/safeds/ml/classical/classification/_k_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand All @@ -25,13 +27,13 @@ class KNearestNeighbors(Classifier):
Raises
------
ValueError
If the number of neighbors is less than or equal to 0.
If `number_of_neighbors` is less than or equal to 0.
"""

def __init__(self, number_of_neighbors: int) -> None:
# Validation
if number_of_neighbors <= 0:
raise ValueError("The number of neighbors has to be greater than 0.")
raise ValueError("The parameter 'number_of_neighbors' has to be greater than 0.")

# Hyperparameters
self._number_of_neighbors = number_of_neighbors
Expand Down Expand Up @@ -60,19 +62,18 @@ def fit(self, training_set: TaggedTable) -> KNearestNeighbors:
Raises
------
ValueError
If the number of neighbors is greater than the sample size.
If `number_of_neighbors` is greater than the sample size.
LearningError
If the training data contains invalid values or if the training failed.
"""
if self._number_of_neighbors > training_set.number_of_rows:
raise ValueError(
(
f"The number of neighbors ({self._number_of_neighbors}) has to be less than or equal to the sample "
f"size ({training_set.number_of_rows})."
f"The parameter 'number_of_neighbors' ({self._number_of_neighbors}) has to be less than or equal to"
f" the sample size ({training_set.number_of_rows})."
),
)

wrapped_classifier = sk_KNeighborsClassifier(self._number_of_neighbors, n_jobs=-1)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = KNearestNeighbors(self._number_of_neighbors)
Expand Down Expand Up @@ -119,3 +120,14 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
return sk_KNeighborsClassifier(self._number_of_neighbors, n_jobs=-1)
15 changes: 14 additions & 1 deletion src/safeds/ml/classical/classification/_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand Down Expand Up @@ -42,7 +44,7 @@ def fit(self, training_set: TaggedTable) -> LogisticRegression:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_LogisticRegression(n_jobs=-1)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = LogisticRegression()
Expand Down Expand Up @@ -89,3 +91,14 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
return sk_LogisticRegression(n_jobs=-1)
19 changes: 16 additions & 3 deletions src/safeds/ml/classical/classification/_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand All @@ -23,13 +25,13 @@ class RandomForest(Classifier):
Raises
------
ValueError
If the number of trees is less than or equal to 0.
If `number_of_trees` is less than or equal to 0.
"""

def __init__(self, number_of_trees: int = 100) -> None:
# Validation
if number_of_trees < 1:
raise ValueError("The number of trees has to be greater than 0.")
raise ValueError("The parameter 'number_of_trees' has to be greater than 0.")

# Hyperparameters
self._number_of_trees = number_of_trees
Expand Down Expand Up @@ -60,7 +62,7 @@ def fit(self, training_set: TaggedTable) -> RandomForest:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = RandomForest(self._number_of_trees)
Expand Down Expand Up @@ -107,3 +109,14 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
return sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1)
17 changes: 15 additions & 2 deletions src/safeds/ml/classical/classification/_support_vector_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._classifier import Classifier

if TYPE_CHECKING:
from sklearn.base import ClassifierMixin

from safeds.data.tabular.containers import Table, TaggedTable


Expand All @@ -34,7 +36,7 @@ def __init__(self, c: float = 1.0) -> None:
self._target_name: str | None = None

if c <= 0:
raise ValueError("The strength of regularization given by the c parameter must be strictly positive.")
raise ValueError("The parameter 'c' has to be strictly positive.")
self._c = c

def fit(self, training_set: TaggedTable) -> SupportVectorMachine:
Expand All @@ -58,7 +60,7 @@ def fit(self, training_set: TaggedTable) -> SupportVectorMachine:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_SVC(C=self._c)
wrapped_classifier = self._get_sklearn_classifier()
fit(wrapped_classifier, training_set)

result = SupportVectorMachine(self._c)
Expand Down Expand Up @@ -105,3 +107,14 @@ def is_fitted(self) -> bool:
Whether the classifier is fitted.
"""
return self._wrapped_classifier is not None

def _get_sklearn_classifier(self) -> ClassifierMixin:
"""
Return a new wrapped Classifier from sklearn.
Returns
-------
wrapped_classifier: ClassifierMixin
The sklearn Classifier.
"""
return sk_SVC(C=self._c)
Loading

0 comments on commit bb5a07e

Please sign in to comment.