refactor: extract column name and out of bounds checks (#758)

Closes #407 Closes #637 ### Summary of Changes * New internal function `_check_bounds` to check whether a value is in some interval or raise an `OutofBoundsError`. Now, bounds only need to be specified once instead of twice (if + when raising). * New internal function `_check_columns_exist` to check whether a column with a given name exists or raise an `ColumnNotFoundError`. Now, we get consistent error messages with suggestions of similar column names. --------- Co-authored-by: megalinter-bot <[email protected]>
Safe-DS · May 13, 2024 · 92622fb · 92622fb
1 parent e72339e
commit 92622fb
Show file tree

Hide file tree

Showing 96 changed files with 994 additions and 1,513 deletions.
diff --git a/benchmarks/metrics/classification.py b/benchmarks/metrics/classification.py
@@ -1,15 +1,13 @@
 from __future__ import annotations
 
 from timeit import timeit
-from typing import TYPE_CHECKING
 
 import polars as pl
 
 from benchmarks.table.utils import create_synthetic_table
 from safeds.data.tabular.containers import Table
 from safeds.ml.metrics import ClassificationMetrics
 
-
 REPETITIONS = 10
 
 
@@ -32,9 +30,7 @@ def _run_recall() -> None:
 if __name__ == "__main__":
     # Create a synthetic Table
     table = (
-        create_synthetic_table(10000, 2)
-        .rename_column("column_0", "predicted")
-        .rename_column("column_1", "expected")
+        create_synthetic_table(10000, 2).rename_column("column_0", "predicted").rename_column("column_1", "expected")
     )
 
     # Run the benchmarks

diff --git a/docs/tutorials/classification.ipynb b/docs/tutorials/classification.ipynb
@@ -23,7 +23,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.containers import Table\n",
     "\n",
@@ -33,7 +32,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -48,15 +48,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "train_table, testing_table = titanic.split_rows(0.6)\n",
     "\n",
     "test_table = testing_table.remove_columns([\"survived\"]).shuffle_rows()"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -73,15 +73,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import OneHotEncoder\n",
     "\n",
     "encoder = OneHotEncoder().fit(train_table, [\"sex\"])"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -95,11 +95,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": "transformed_table = encoder.transform(train_table)",
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -111,15 +111,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "extra_names = [\"id\", \"name\", \"ticket\", \"cabin\", \"port_embarked\", \"age\", \"fare\"]\n",
     "\n",
     "train_tabular_dataset = transformed_table.to_tabular_dataset(\"survived\", extra_names)"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -131,7 +131,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.ml.classical.classification import RandomForestClassifier\n",
     "\n",
@@ -140,7 +139,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -155,7 +155,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "encoder = OneHotEncoder().fit(test_table, [\"sex\"])\n",
     "transformed_test_table = encoder.transform(test_table)\n",
@@ -168,7 +167,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -182,7 +182,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "encoder = OneHotEncoder().fit(test_table, [\"sex\"])\n",
     "testing_table = encoder.transform(testing_table)\n",
@@ -192,7 +191,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   }
  ],
  "metadata": {

diff --git a/docs/tutorials/regression.ipynb b/docs/tutorials/regression.ipynb
@@ -23,7 +23,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.containers import Table\n",
     "\n",
@@ -33,7 +32,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -48,15 +48,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "train_table, testing_table = pricing.split_rows(0.60)\n",
     "\n",
     "test_table = testing_table.remove_columns([\"price\"]).shuffle_rows()"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -68,15 +68,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "extra_names = [\"id\"]\n",
     "\n",
     "train_tabular_dataset = train_table.to_tabular_dataset(\"price\", extra_names)\n"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -88,7 +88,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.ml.classical.regression import DecisionTreeRegressor\n",
     "\n",
@@ -97,7 +96,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -111,7 +111,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "prediction = fitted_model.predict(\n",
     "    test_table\n",
@@ -121,7 +120,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -135,24 +135,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [
-    {
-     "data": {
-      "text/plain": "105595.6001735107"
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "test_tabular_dataset = testing_table.to_tabular_dataset(\"price\", extra_names)\n",
     "\n",
     "fitted_model.mean_absolute_error(test_tabular_dataset)\n"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   }
  ],
  "metadata": {

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -87,7 +87,7 @@ plugins:
   - search
   - mkdocs-jupyter:
       include: ["*.ipynb"]
-      execute: true
+      execute: false # TODO: Enable execution
       allow_errors: false
   - exclude:
       glob: