Skip to content

Commit

Permalink
refactor: extract column name and out of bounds checks (#758)
Browse files Browse the repository at this point in the history
Closes #407
Closes #637

### Summary of Changes

* New internal function `_check_bounds` to check whether a value is in
some interval or raise an `OutofBoundsError`. Now, bounds only need to
be specified once instead of twice (if + when raising).
* New internal function `_check_columns_exist` to check whether a column
with a given name exists or raise an `ColumnNotFoundError`. Now, we get
consistent error messages with suggestions of similar column names.

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lars-reimann and megalinter-bot authored May 13, 2024
1 parent e72339e commit 92622fb
Show file tree
Hide file tree
Showing 96 changed files with 994 additions and 1,513 deletions.
6 changes: 1 addition & 5 deletions benchmarks/metrics/classification.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from __future__ import annotations

from timeit import timeit
from typing import TYPE_CHECKING

import polars as pl

from benchmarks.table.utils import create_synthetic_table
from safeds.data.tabular.containers import Table
from safeds.ml.metrics import ClassificationMetrics


REPETITIONS = 10


Expand All @@ -32,9 +30,7 @@ def _run_recall() -> None:
if __name__ == "__main__":
# Create a synthetic Table
table = (
create_synthetic_table(10000, 2)
.rename_column("column_0", "predicted")
.rename_column("column_1", "expected")
create_synthetic_table(10000, 2).rename_column("column_0", "predicted").rename_column("column_1", "expected")
)

# Run the benchmarks
Expand Down
32 changes: 16 additions & 16 deletions docs/tutorials/classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.data.tabular.containers import Table\n",
"\n",
Expand All @@ -33,7 +32,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -48,15 +48,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"train_table, testing_table = titanic.split_rows(0.6)\n",
"\n",
"test_table = testing_table.remove_columns([\"survived\"]).shuffle_rows()"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -73,15 +73,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.data.tabular.transformation import OneHotEncoder\n",
"\n",
"encoder = OneHotEncoder().fit(train_table, [\"sex\"])"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -95,11 +95,11 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": "transformed_table = encoder.transform(train_table)",
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -111,15 +111,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"extra_names = [\"id\", \"name\", \"ticket\", \"cabin\", \"port_embarked\", \"age\", \"fare\"]\n",
"\n",
"train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", extra_names)"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -131,7 +131,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.ml.classical.classification import RandomForestClassifier\n",
"\n",
Expand All @@ -140,7 +139,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -155,7 +155,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"encoder = OneHotEncoder().fit(test_table, [\"sex\"])\n",
"transformed_test_table = encoder.transform(test_table)\n",
Expand All @@ -168,7 +167,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -182,7 +182,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"encoder = OneHotEncoder().fit(test_table, [\"sex\"])\n",
"testing_table = encoder.transform(testing_table)\n",
Expand All @@ -192,7 +191,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
}
],
"metadata": {
Expand Down
33 changes: 12 additions & 21 deletions docs/tutorials/regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.data.tabular.containers import Table\n",
"\n",
Expand All @@ -33,7 +32,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -48,15 +48,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"train_table, testing_table = pricing.split_rows(0.60)\n",
"\n",
"test_table = testing_table.remove_columns([\"price\"]).shuffle_rows()"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -68,15 +68,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"extra_names = [\"id\"]\n",
"\n",
"train_tabular_dataset = train_table.to_tabular_dataset(\"price\", extra_names)\n"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -88,7 +88,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.ml.classical.regression import DecisionTreeRegressor\n",
"\n",
Expand All @@ -97,7 +96,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -111,7 +111,6 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"prediction = fitted_model.predict(\n",
" test_table\n",
Expand All @@ -121,7 +120,8 @@
],
"metadata": {
"collapsed": false
}
},
"outputs": []
},
{
"cell_type": "markdown",
Expand All @@ -135,24 +135,15 @@
{
"cell_type": "code",
"execution_count": null,
"outputs": [
{
"data": {
"text/plain": "105595.6001735107"
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_tabular_dataset = testing_table.to_tabular_dataset(\"price\", extra_names)\n",
"\n",
"fitted_model.mean_absolute_error(test_tabular_dataset)\n"
],
"metadata": {
"collapsed": false
}
},
"outputs": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ plugins:
- search
- mkdocs-jupyter:
include: ["*.ipynb"]
execute: true
execute: false # TODO: Enable execution
allow_errors: false
- exclude:
glob:
Expand Down
Loading

0 comments on commit 92622fb

Please sign in to comment.