From d7c65d5b74e82525e39743d7e0ba313a111f3dc9 Mon Sep 17 00:00:00 2001 From: alex-senger Date: Fri, 28 Apr 2023 14:59:35 +0200 Subject: [PATCH 1/3] feat: add `learning_rate` parameter to `gradient_boosting_classification` and `gradient_boosting_regression` --- .../_gradient_boosting_classification.py | 23 +++++++++++++++---- .../_gradient_boosting_regression.py | 23 +++++++++++++++---- .../test_gradient_boosting_classification.py | 17 ++++++++++++++ .../test_gradient_boosting_regression.py | 17 ++++++++++++++ 4 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py create mode 100644 tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py index 5c5e4a129..4e4a908dd 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py @@ -13,12 +13,27 @@ class GradientBoosting(Classifier): - """Gradient boosting classification.""" + """Gradient boosting classification. - def __init__(self) -> None: + Parameters + ---------- + learning_rate : float + Learning rate shrinks the contribution of each tree by `learning_rate`. + + + Raises + ------ + ValueError + If `learning_rate` is non-positive. + """ + + def __init__(self, learning_rate: float = 0.1) -> None: self._wrapped_classifier: sk_GradientBoostingClassifier | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + if learning_rate <= 0: + raise ValueError("learning_rate must be non-negative.") + self._learning_rate = learning_rate def fit(self, training_set: TaggedTable) -> GradientBoosting: """ @@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: LearningError If the training data contains invalid values or if the training failed. """ - wrapped_classifier = sk_GradientBoostingClassifier() + wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate) fit(wrapped_classifier, training_set) - result = GradientBoosting() + result = GradientBoosting(learning_rate=self._learning_rate) result._wrapped_classifier = wrapped_classifier result._feature_names = training_set.features.column_names result._target_name = training_set.target.name diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py index d57fe8e16..466d5aebe 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py @@ -13,12 +13,27 @@ class GradientBoosting(Regressor): - """Gradient boosting regression.""" + """Gradient boosting regression. - def __init__(self) -> None: + Parameters + ---------- + learning_rate : float + Learning rate shrinks the contribution of each tree by `learning_rate`. + + + Raises + ------ + ValueError + If `learning_rate` is non-positive. + """ + + def __init__(self, learning_rate: float = 0.1) -> None: self._wrapped_regressor: sk_GradientBoostingRegressor | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None + if learning_rate <= 0: + raise ValueError("learning_rate must be non-negative.") + self._learning_rate = learning_rate def fit(self, training_set: TaggedTable) -> GradientBoosting: """ @@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting: LearningError If the training data contains invalid values or if the training failed. """ - wrapped_regressor = sk_GradientBoostingRegressor() + wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate) fit(wrapped_regressor, training_set) - result = GradientBoosting() + result = GradientBoosting(learning_rate=self._learning_rate) result._wrapped_regressor = wrapped_regressor result._feature_names = training_set.features.column_names result._target_name = training_set.target.name diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py b/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py new file mode 100644 index 000000000..55961ba46 --- /dev/null +++ b/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py @@ -0,0 +1,17 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.ml.classical.classification import GradientBoosting + + +def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None: + with pytest.raises(ValueError, match="learning_rate must be non-negative."): + GradientBoosting(learning_rate=-1) + + +def test_should_pass_learning_rate_to_sklearn() -> None: + training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + tagged_table = training_set.tag_columns("col1") + + regressor = GradientBoosting(learning_rate=2).fit(tagged_table) + assert regressor._wrapped_classifier is not None + assert regressor._wrapped_classifier.learning_rate == regressor._learning_rate diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py b/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py new file mode 100644 index 000000000..5391d47e4 --- /dev/null +++ b/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py @@ -0,0 +1,17 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.ml.classical.regression import GradientBoosting + + +def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None: + with pytest.raises(ValueError, match="learning_rate must be non-negative."): + GradientBoosting(learning_rate=-1) + + +def test_should_pass_learning_rate_to_sklearn() -> None: + training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + tagged_table = training_set.tag_columns("col1") + + regressor = GradientBoosting(learning_rate=2).fit(tagged_table) + assert regressor._wrapped_regressor is not None + assert regressor._wrapped_regressor.learning_rate == regressor._learning_rate From 9a4999a2f520c5c2decf32dd28c25b9dd21278f8 Mon Sep 17 00:00:00 2001 From: alex-senger Date: Fri, 28 Apr 2023 15:19:06 +0200 Subject: [PATCH 2/3] fix: change docstrings error message and tests --- .../classification/_gradient_boosting_classification.py | 5 ++--- .../ml/classical/regression/_gradient_boosting_regression.py | 5 ++--- .../classification/test_gradient_boosting_classification.py | 2 +- .../regression/test_gradient_boosting_regression.py | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py index 4e4a908dd..0136e8f11 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py @@ -18,8 +18,7 @@ class GradientBoosting(Classifier): Parameters ---------- learning_rate : float - Learning rate shrinks the contribution of each tree by `learning_rate`. - + The contribution of each tree shrinks by `learning_rate`. Raises ------ @@ -32,7 +31,7 @@ def __init__(self, learning_rate: float = 0.1) -> None: self._feature_names: list[str] | None = None self._target_name: str | None = None if learning_rate <= 0: - raise ValueError("learning_rate must be non-negative.") + raise ValueError("learning_rate must be positive.") self._learning_rate = learning_rate def fit(self, training_set: TaggedTable) -> GradientBoosting: diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py index 466d5aebe..953cc66c0 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py @@ -18,8 +18,7 @@ class GradientBoosting(Regressor): Parameters ---------- learning_rate : float - Learning rate shrinks the contribution of each tree by `learning_rate`. - + The contribution of each tree shrinks by `learning_rate`. Raises ------ @@ -32,7 +31,7 @@ def __init__(self, learning_rate: float = 0.1) -> None: self._feature_names: list[str] | None = None self._target_name: str | None = None if learning_rate <= 0: - raise ValueError("learning_rate must be non-negative.") + raise ValueError("learning_rate must be positive.") self._learning_rate = learning_rate def fit(self, training_set: TaggedTable) -> GradientBoosting: diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py b/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py index 55961ba46..5c7da7da5 100644 --- a/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py +++ b/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py @@ -4,7 +4,7 @@ def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None: - with pytest.raises(ValueError, match="learning_rate must be non-negative."): + with pytest.raises(ValueError, match="learning_rate must be positive."): GradientBoosting(learning_rate=-1) diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py b/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py index 5391d47e4..8c5a6e65f 100644 --- a/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py +++ b/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py @@ -4,7 +4,7 @@ def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None: - with pytest.raises(ValueError, match="learning_rate must be non-negative."): + with pytest.raises(ValueError, match="learning_rate must be positive."): GradientBoosting(learning_rate=-1) From 8778bb485e9f09c595fa88b6cdb61d69588dc184 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 28 Apr 2023 21:31:23 +0200 Subject: [PATCH 3/3] docs: improve docstring for `learning_rate` --- .../classification/_gradient_boosting_classification.py | 3 ++- .../ml/classical/regression/_gradient_boosting_regression.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py index 0136e8f11..f154baf67 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py @@ -18,7 +18,8 @@ class GradientBoosting(Classifier): Parameters ---------- learning_rate : float - The contribution of each tree shrinks by `learning_rate`. + The larger the value, the more the model is influenced by each additional tree. If the learning rate is too + low, the model might underfit. If the learning rate is too high, the model might overfit. Raises ------ diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py index 953cc66c0..071d59049 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py @@ -18,7 +18,8 @@ class GradientBoosting(Regressor): Parameters ---------- learning_rate : float - The contribution of each tree shrinks by `learning_rate`. + The larger the value, the more the model is influenced by each additional tree. If the learning rate is too + low, the model might underfit. If the learning rate is too high, the model might overfit. Raises ------