From 20fc31395390a21ce052bea6dc295ed66ddef310 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 17 Apr 2024 16:32:47 +0200 Subject: [PATCH] perf: lazily import our modules and external libraries (#624) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary of Changes We depend on several large libraries that take a while to load. Previously, importing `Table` would already import almost all of them, leading to horrendous startup times: ``` (Measure-Command { python -c "from safeds.data.tabular.containers import Table" }).TotalSeconds ``` ➡️ 3.5068337 (seconds to import `Table` on main) Now, we lazily import our own modules in the `__init__.py` files, and we lazily import external libraries. The latter part is quite ugly, since each function must now contain their external imports at the start. There is no better solution, however, and the improvements are huge: ``` (Measure-Command { python -c "from safeds.data.tabular.containers import Table" }).TotalSeconds ``` ➡️ 0.1683219 (seconds to import `Table` in this branch) We still have to pay the cost for an import once we first import a module, but at least this no longer has to happen fully upfront. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- .github/linters/.ruff.toml | 4 + poetry.lock | 16 ++- pyproject.toml | 3 +- src/safeds/_config/__init__.py | 14 +- src/safeds/_config/_device.py | 10 +- src/safeds/_utils/__init__.py | 14 +- src/safeds/_utils/_hashing.py | 4 +- src/safeds/data/image/containers/__init__.py | 17 ++- .../image/containers/_empty_image_list.py | 7 +- src/safeds/data/image/containers/_image.py | 120 ++++++++++++++---- .../data/image/containers/_image_list.py | 18 +-- .../containers/_multi_size_image_list.py | 11 +- .../containers/_single_size_image_list.py | 64 +++++++++- .../data/tabular/containers/__init__.py | 26 +++- src/safeds/data/tabular/containers/_column.py | 24 ++-- src/safeds/data/tabular/containers/_row.py | 10 +- src/safeds/data/tabular/containers/_table.py | 100 +++++++++++---- .../data/tabular/containers/_tagged_table.py | 38 +++--- .../data/tabular/containers/_time_series.py | 43 +++++-- .../data/tabular/transformation/__init__.py | 33 ++++- .../tabular/transformation/_discretizer.py | 7 +- .../data/tabular/transformation/_imputer.py | 12 +- .../tabular/transformation/_label_encoder.py | 8 +- .../transformation/_one_hot_encoder.py | 8 +- .../tabular/transformation/_range_scaler.py | 7 +- .../transformation/_standard_scaler.py | 7 +- src/safeds/data/tabular/typing/__init__.py | 26 +++- .../data/tabular/typing/_column_type.py | 7 +- .../data/tabular/typing/_imputer_strategy.py | 8 +- src/safeds/exceptions/__init__.py | 112 +++++++++++----- src/safeds/exceptions/_generic.py | 6 +- .../ml/classical/classification/__init__.py | 35 +++-- .../ml/classical/classification/_ada_boost.py | 5 +- .../classical/classification/_classifier.py | 4 +- .../classification/_decision_tree.py | 5 +- .../classification/_gradient_boosting.py | 5 +- .../classification/_k_nearest_neighbors.py | 5 +- .../classification/_logistic_regression.py | 5 +- .../classification/_random_forest.py | 5 +- .../classification/_support_vector_machine.py | 5 +- .../ml/classical/regression/__init__.py | 47 +++++-- .../ml/classical/regression/_ada_boost.py | 5 +- src/safeds/ml/classical/regression/_arima.py | 11 +- .../ml/classical/regression/_decision_tree.py | 5 +- .../regression/_elastic_net_regression.py | 5 +- .../regression/_gradient_boosting.py | 5 +- .../regression/_k_nearest_neighbors.py | 5 +- .../classical/regression/_lasso_regression.py | 5 +- .../regression/_linear_regression.py | 5 +- .../ml/classical/regression/_random_forest.py | 5 +- .../ml/classical/regression/_regressor.py | 7 +- .../classical/regression/_ridge_regression.py | 5 +- .../regression/_support_vector_machine.py | 5 +- src/safeds/ml/hyperparameters/__init__.py | 14 +- src/safeds/ml/nn/__init__.py | 18 ++- src/safeds/ml/nn/_model.py | 86 ++++++++----- 56 files changed, 800 insertions(+), 291 deletions(-) diff --git a/.github/linters/.ruff.toml b/.github/linters/.ruff.toml index 9dc8ef022..a3224bc22 100644 --- a/.github/linters/.ruff.toml +++ b/.github/linters/.ruff.toml @@ -103,6 +103,10 @@ ignore = [ "D106", "D107", ] +"__init__.py" = [ + # runtime-import-in-type-checking-block: Does not work with apipkg. + "TCH004", +] [pydocstyle] convention = "numpy" diff --git a/poetry.lock b/poetry.lock index 32df8fe24..91f42f39c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "anyio" @@ -20,6 +20,17 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (>=0.23)"] +[[package]] +name = "apipkg" +version = "3.0.2" +description = "apipkg: namespace control and lazy-import mechanism" +optional = false +python-versions = ">=3.7" +files = [ + {file = "apipkg-3.0.2-py3-none-any.whl", hash = "sha256:a16984c39de280701f3f6406ed3af658f2a1965011fe7bb5be34fbb48423b411"}, + {file = "apipkg-3.0.2.tar.gz", hash = "sha256:c7aa61a4f82697fdaa667e70af1505acf1f7428b1c27b891d204ba7a8a3c5e0d"}, +] + [[package]] name = "appnope" version = "0.1.4" @@ -3026,7 +3037,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4399,4 +4409,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11,<3.13" -content-hash = "234bdb751d737d5fe454908cc083833ae668716e9a990813c1c23265954a55b2" +content-hash = "9f1eec71ee5ede694500a47405011516b583565894d16d5984d5b768ccb4d27d" diff --git a/pyproject.toml b/pyproject.toml index 8137b79f7..aca9277c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.11,<3.13" +apipkg = "^3.0.2" ipython = "^8.8.0" levenshtein = ">=0.21.1,<0.26.0" matplotlib = "^3.6.3" @@ -23,10 +24,10 @@ pillow = ">=9.5,<11.0" scikit-image = ">=0.21,<0.23" scikit-learn = "^1.2.0" seaborn = "^0.13.0" +statsmodels = "^0.14.1" torch = {version = "^2.2.0", source = "torch_cuda121"} torchvision = {version = "^0.17.0", source = "torch_cuda121"} xxhash = "^3.4.1" -statsmodels = "^0.14.1" [tool.poetry.group.dev.dependencies] pytest = ">=7.2.1,<9.0.0" diff --git a/src/safeds/_config/__init__.py b/src/safeds/_config/__init__.py index cfe590409..09d286a38 100644 --- a/src/safeds/_config/__init__.py +++ b/src/safeds/_config/__init__.py @@ -1,6 +1,18 @@ """Configuration for Safe-DS.""" -from ._device import _get_device +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._device import _get_device + +apipkg.initpkg( + __name__, + { + "_get_device": "._device:_get_device", + }, +) __all__ = [ "_get_device", diff --git a/src/safeds/_config/_device.py b/src/safeds/_config/_device.py index c3fbb3f4b..10c6ca8e7 100644 --- a/src/safeds/_config/_device.py +++ b/src/safeds/_config/_device.py @@ -1,6 +1,12 @@ -import torch -from torch.types import Device +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from torch.types import Device def _get_device() -> Device: + import torch + return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") diff --git a/src/safeds/_utils/__init__.py b/src/safeds/_utils/__init__.py index 78dbcf575..a95e373a5 100644 --- a/src/safeds/_utils/__init__.py +++ b/src/safeds/_utils/__init__.py @@ -1,6 +1,18 @@ """Utilities for Safe-DS.""" -from ._hashing import _structural_hash +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._hashing import _structural_hash + +apipkg.initpkg( + __name__, + { + "_structural_hash": "._hashing:_structural_hash", + }, +) __all__ = [ "_structural_hash", diff --git a/src/safeds/_utils/_hashing.py b/src/safeds/_utils/_hashing.py index fd336ebe1..3cebafd4a 100644 --- a/src/safeds/_utils/_hashing.py +++ b/src/safeds/_utils/_hashing.py @@ -3,8 +3,6 @@ import struct from typing import Any -import xxhash - def _structural_hash(*value: Any) -> int: """ @@ -20,6 +18,8 @@ def _structural_hash(*value: Any) -> int: hash Deterministic hash value """ + import xxhash + return xxhash.xxh3_64(_value_to_bytes(value)).intdigest() diff --git a/src/safeds/data/image/containers/__init__.py b/src/safeds/data/image/containers/__init__.py index 169a53d45..f0a8f344c 100644 --- a/src/safeds/data/image/containers/__init__.py +++ b/src/safeds/data/image/containers/__init__.py @@ -1,7 +1,20 @@ """Classes that can store image data.""" -from ._image import Image -from ._image_list import ImageList +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._image import Image + from ._image_list import ImageList + +apipkg.initpkg( + __name__, + { + "Image": "._image:Image", + "ImageList": "._image_list:ImageList", + }, +) __all__ = [ "Image", diff --git a/src/safeds/data/image/containers/_empty_image_list.py b/src/safeds/data/image/containers/_empty_image_list.py index 531fa0472..799ecd670 100644 --- a/src/safeds/data/image/containers/_empty_image_list.py +++ b/src/safeds/data/image/containers/_empty_image_list.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash +from safeds.data.image.containers._image_list import ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, @@ -15,6 +17,7 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.exceptions import IndexOutOfBoundsError if TYPE_CHECKING: from pathlib import Path @@ -23,10 +26,6 @@ from safeds.data.image.containers import Image -from safeds.data.image.containers._image_list import ImageList -from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList -from safeds.exceptions import IndexOutOfBoundsError - class _EmptyImageList(ImageList): """ diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 886afa70a..6c3296197 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -6,11 +6,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import torch -from PIL.Image import open as pil_image_open -from torch import Tensor -from torchvision.transforms import InterpolationMode - from safeds._config import _get_device from safeds._utils import _structural_hash from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( @@ -23,15 +18,11 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.exceptions import IllegalFormatError if TYPE_CHECKING: + from torch import Tensor from torch.types import Device -import torchvision -from torchvision.transforms.v2 import PILToTensor -from torchvision.transforms.v2 import functional as func2 -from torchvision.utils import save_image - -from safeds.exceptions import IllegalFormatError class Image: @@ -44,17 +35,24 @@ class Image: The image data as tensor. """ - _pil_to_tensor = PILToTensor() - _default_device = _get_device() - _FILTER_EDGES_KERNEL = ( - torch.tensor([[-1.0, -1.0, -1.0], [-1.0, 8.0, -1.0], [-1.0, -1.0, -1.0]]) - .unsqueeze(dim=0) - .unsqueeze(dim=0) - .to(_default_device) - ) + _filter_edges_kernel_cache: Tensor | None = None @staticmethod - def from_file(path: str | Path, device: Device = _default_device) -> Image: + def _filter_edges_kernel() -> Tensor: + import torch + + if Image._filter_edges_kernel_cache is None: + Image._filter_edges_kernel_cache = ( + torch.tensor([[-1.0, -1.0, -1.0], [-1.0, 8.0, -1.0], [-1.0, -1.0, -1.0]]) + .unsqueeze(dim=0) + .unsqueeze(dim=0) + .to(_get_device()) + ) + + return Image._filter_edges_kernel_cache + + @staticmethod + def from_file(path: str | Path, device: Device = None) -> Image: """ Create an image from a file. @@ -75,10 +73,16 @@ def from_file(path: str | Path, device: Device = _default_device) -> Image: FileNotFoundError If the file of the path cannot be found """ - return Image(image_tensor=Image._pil_to_tensor(pil_image_open(path)), device=device) + from PIL.Image import open as pil_image_open + from torchvision.transforms.functional import pil_to_tensor + + if device is None: + device = _get_device() + + return Image(image_tensor=pil_to_tensor(pil_image_open(path)), device=device) @staticmethod - def from_bytes(data: bytes, device: Device = _default_device) -> Image: + def from_bytes(data: bytes, device: Device = None) -> Image: """ Create an image from bytes. @@ -94,15 +98,25 @@ def from_bytes(data: bytes, device: Device = _default_device) -> Image: image: The image. """ + import torch + import torchvision + + if device is None: + device = _get_device() + with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="The given buffer is not writable, and PyTorch does not support non-writable tensors.", ) input_tensor = torch.frombuffer(data, dtype=torch.uint8) + return Image(image_tensor=torchvision.io.decode_image(input_tensor), device=device) - def __init__(self, image_tensor: Tensor, device: Device = _default_device) -> None: + def __init__(self, image_tensor: Tensor, device: Device = None) -> None: + if device is None: + device = _get_device() + self._image_tensor: Tensor = image_tensor.to(device) def __eq__(self, other: object) -> bool: @@ -119,6 +133,8 @@ def __eq__(self, other: object) -> bool: equals: Whether the two images contain equal pixel data. """ + import torch + if not isinstance(other, Image): return NotImplemented return ( @@ -159,6 +175,10 @@ def _repr_jpeg_(self) -> bytes | None: jpeg: The image as JPEG. """ + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + if self.channel == 4: return None buffer = io.BytesIO() @@ -178,6 +198,10 @@ def _repr_png_(self) -> bytes: png: The image as PNG. """ + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + buffer = io.BytesIO() if self.channel == 1: func2.to_pil_image(self._image_tensor, mode="L").save(buffer, format="png") @@ -262,6 +286,10 @@ def to_jpeg_file(self, path: str | Path) -> None: path: The path to the JPEG file. """ + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + if self.channel == 4: raise IllegalFormatError("png") Path(path).parent.mkdir(parents=True, exist_ok=True) @@ -279,6 +307,10 @@ def to_png_file(self, path: str | Path) -> None: path: The path to the PNG file. """ + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + Path(path).parent.mkdir(parents=True, exist_ok=True) if self.channel == 1: func2.to_pil_image(self._image_tensor, mode="L").save(path, format="png") @@ -310,6 +342,8 @@ def change_channel(self, channel: int) -> Image: ValueError if the given channel is not a valid channel option """ + import torch + if self.channel == channel: image_tensor = self._image_tensor elif self.channel == 1 and channel == 3: @@ -360,6 +394,9 @@ def resize(self, new_width: int, new_height: int) -> Image: OutOfBoundsError If new_width or new_height are below 1 """ + from torchvision.transforms import InterpolationMode + from torchvision.transforms.v2 import functional as func2 + _check_resize_errors(new_width, new_height) return Image( func2.resize(self._image_tensor, size=[new_height, new_width], interpolation=InterpolationMode.NEAREST), @@ -377,6 +414,9 @@ def convert_to_grayscale(self) -> Image: result: The grayscale image. """ + import torch + from torchvision.transforms.v2 import functional as func2 + if self.channel == 4: return Image( torch.cat( @@ -417,6 +457,8 @@ def crop(self, x: int, y: int, width: int, height: int) -> Image: OutOfBoundsError If x or y are below 0 or if width or height are below 1 """ + from torchvision.transforms.v2 import functional as func2 + _check_crop_errors_and_warnings(x, y, width, height, self.width, self.height, plural=False) return Image(func2.crop(self._image_tensor, y, x, height, width), device=self.device) @@ -431,6 +473,8 @@ def flip_vertically(self) -> Image: result: The flipped image. """ + from torchvision.transforms.v2 import functional as func2 + return Image(func2.vertical_flip(self._image_tensor), device=self.device) def flip_horizontally(self) -> Image: @@ -444,6 +488,8 @@ def flip_horizontally(self) -> Image: result: The flipped image. """ + from torchvision.transforms.v2 import functional as func2 + return Image(func2.horizontal_flip(self._image_tensor), device=self.device) def adjust_brightness(self, factor: float) -> Image: @@ -471,6 +517,9 @@ def adjust_brightness(self, factor: float) -> Image: OutOfBoundsError If factor is smaller than 0. """ + import torch + from torchvision.transforms.v2 import functional as func2 + _check_adjust_brightness_errors_and_warnings(factor, plural=False) if self.channel == 4: return Image( @@ -506,6 +555,8 @@ def add_noise(self, standard_deviation: float) -> Image: OutOfBoundsError If standard_deviation is smaller than 0. """ + import torch + _check_add_noise_errors(standard_deviation) return Image( self._image_tensor + torch.normal(0, standard_deviation, self._image_tensor.size()).to(self.device) * 255, @@ -536,6 +587,9 @@ def adjust_contrast(self, factor: float) -> Image: OutOfBoundsError If factor is smaller than 0. """ + import torch + from torchvision.transforms.v2 import functional as func2 + _check_adjust_contrast_errors_and_warnings(factor, plural=False) if self.channel == 4: return Image( @@ -602,6 +656,8 @@ def blur(self, radius: int) -> Image: OutOfBoundsError If radius is smaller than 0 or equal or greater than the smaller size of the image. """ + from torchvision.transforms.v2 import functional as func2 + _check_blur_errors_and_warnings(radius, min(self.width, self.height), plural=False) return Image(func2.gaussian_blur(self._image_tensor, [radius * 2 + 1, radius * 2 + 1]), device=self.device) @@ -629,6 +685,9 @@ def sharpen(self, factor: float) -> Image: OutOfBoundsError If factor is smaller than 0. """ + import torch + from torchvision.transforms.v2 import functional as func2 + _check_sharpen_errors_and_warnings(factor, plural=False) if self.channel == 4: return Image( @@ -654,6 +713,9 @@ def invert_colors(self) -> Image: result: The image with inverted colors. """ + import torch + from torchvision.transforms.v2 import functional as func2 + if self.channel == 4: return Image( torch.cat([func2.invert(self._image_tensor[0:3]), self._image_tensor[3].unsqueeze(dim=0)]), @@ -673,6 +735,8 @@ def rotate_right(self) -> Image: result: The image rotated 90 degrees clockwise. """ + from torchvision.transforms.v2 import functional as func2 + return Image(func2.rotate(self._image_tensor, -90, expand=True), device=self.device) def rotate_left(self) -> Image: @@ -686,6 +750,8 @@ def rotate_left(self) -> Image: result: The image rotated 90 degrees counter-clockwise. """ + from torchvision.transforms.v2 import functional as func2 + return Image(func2.rotate(self._image_tensor, 90, expand=True), device=self.device) def find_edges(self) -> Image: @@ -699,10 +765,12 @@ def find_edges(self) -> Image: result: The image with edges found. """ + import torch + kernel = ( - Image._FILTER_EDGES_KERNEL - if self.device.type == Image._default_device - else Image._FILTER_EDGES_KERNEL.to(self.device) + Image._filter_edges_kernel() + if self.device.type == _get_device() + else Image._filter_edges_kernel().to(self.device) ) edges_tensor = torch.clamp( torch.nn.functional.conv2d( diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 9b1eef92d..e5a0f7509 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -7,17 +7,13 @@ from pathlib import Path from typing import TYPE_CHECKING -import torch -from PIL.Image import open as pil_image_open -from torch import Tensor -from torchvision.transforms.v2 import PILToTensor -from torchvision.utils import make_grid, save_image - from safeds.data.image.containers._image import Image if TYPE_CHECKING: from collections.abc import Sequence + from torch import Tensor + from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -34,8 +30,6 @@ class ImageList(metaclass=ABCMeta): | [from_files][safeds.data.image.containers._image_list.ImageList.from_files] | Create an ImageList from a directory or a list of files. | """ - _pil_to_tensor = PILToTensor() - @staticmethod @abstractmethod def _create_image_list(images: list[Tensor], indices: list[int]) -> ImageList: @@ -107,6 +101,9 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: FileNotFoundError If the directory or one of the files of the path cannot be found """ + from PIL.Image import open as pil_image_open + from torchvision.transforms.functional import pil_to_tensor + from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -127,7 +124,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: if p.is_dir(): path_list += sorted([p / name for name in os.listdir(p)]) else: - image_tensors.append(ImageList._pil_to_tensor(pil_image_open(p))) + image_tensors.append(pil_to_tensor(pil_image_open(p))) if fixed_size and ( image_tensors[0].size(dim=2) != image_tensors[-1].size(dim=2) or image_tensors[0].size(dim=1) != image_tensors[-1].size(dim=1) @@ -231,6 +228,9 @@ def _repr_png_(self) -> bytes: png: the png representation of this image list """ + import torch + from torchvision.utils import make_grid, save_image + from safeds.data.image.containers._empty_image_list import _EmptyImageList if isinstance(self, _EmptyImageList): diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index 955086f1c..dffc1fc52 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -5,9 +5,6 @@ import sys from typing import TYPE_CHECKING -import torch -from torch import Tensor - from safeds._utils import _structural_hash from safeds.data.image.containers import Image, ImageList from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( @@ -23,6 +20,8 @@ if TYPE_CHECKING: from pathlib import Path + from torch import Tensor + from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -376,6 +375,8 @@ def _remove_image_by_index_ignore_invalid(self, index: int | list[int]) -> Image return image_list def remove_images_with_size(self, width: int, height: int) -> ImageList: + import torch + _check_remove_images_with_size_errors(width, height) if (width, height) not in self._image_list_dict: return self @@ -445,6 +446,8 @@ def shuffle_images(self) -> ImageList: return image_list def resize(self, new_width: int, new_height: int) -> ImageList: + import torch + from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList image_list_tensors = [] @@ -466,6 +469,8 @@ def convert_to_grayscale(self) -> ImageList: return image_list def crop(self, x: int, y: int, width: int, height: int) -> ImageList: + import torch + from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList image_list_tensors = [] diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 20ad8d856..c81359c0e 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -6,12 +6,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import torch -from torch import Tensor -from torchvision.transforms import InterpolationMode -from torchvision.transforms.v2 import functional as func2 -from torchvision.utils import save_image - from safeds._utils import _structural_hash from safeds.data.image.containers._image import Image from safeds.data.image.containers._image_list import ImageList @@ -33,6 +27,8 @@ ) if TYPE_CHECKING: + from torch import Tensor + from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList @@ -51,12 +47,16 @@ class _SingleSizeImageList(ImageList): """ def __init__(self) -> None: + import torch + self._tensor: Tensor = torch.empty(0) self._tensor_positions_to_indices: list[int] = [] # list[tensor_position] = index self._indices_to_tensor_positions: dict[int, int] = {} # {index: tensor_position} @staticmethod def _create_image_list(images: list[Tensor], indices: list[int]) -> ImageList: + import torch + from safeds.data.image.containers._empty_image_list import _EmptyImageList if len(images) == 0: @@ -129,6 +129,8 @@ def _calc_new_indices_to_tensor_positions(self) -> dict[int, int]: return _indices_to_tensor_positions def __eq__(self, other: object) -> bool: + import torch + if not isinstance(other, ImageList): return NotImplemented if not isinstance(other, _SingleSizeImageList): @@ -205,6 +207,10 @@ def has_image(self, image: Image) -> bool: ) def to_jpeg_files(self, path: str | Path | list[str | Path]) -> None: + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + if self.channel == 4: raise IllegalFormatError("png") path_str: str | Path @@ -248,6 +254,10 @@ def to_jpeg_files(self, path: str | Path | list[str | Path]) -> None: ) def to_png_files(self, path: str | Path | list[str | Path]) -> None: + import torch + from torchvision.transforms.v2 import functional as func2 + from torchvision.utils import save_image + path_str: str | Path if isinstance(path, list): if len(path) == self.number_of_images: @@ -307,6 +317,8 @@ def change_channel(self, channel: int) -> ImageList: @staticmethod def _change_channel_of_tensor(tensor: Tensor, channel: int) -> Tensor: + import torch + """ Change the channel of a tensor to the given channel. @@ -343,6 +355,8 @@ def _change_channel_of_tensor(tensor: Tensor, channel: int) -> Tensor: raise ValueError(f"Channel {channel} is not a valid channel option. Use either 1, 3 or 4") def _add_image_tensor(self, image_tensor: Tensor, index: int) -> ImageList: + import torch + from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList if index in self._indices_to_tensor_positions: @@ -404,6 +418,8 @@ def _add_image_tensor(self, image_tensor: Tensor, index: int) -> ImageList: return image_list_multi def add_images(self, images: list[Image] | ImageList) -> ImageList: + import torch + from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList @@ -555,6 +571,9 @@ def shuffle_images(self) -> ImageList: return image_list def resize(self, new_width: int, new_height: int) -> ImageList: + from torchvision.transforms import InterpolationMode + from torchvision.transforms.v2 import functional as func2 + _check_resize_errors(new_width, new_height) image_list = self._clone_without_tensor() image_list._tensor = func2.resize( @@ -571,6 +590,9 @@ def convert_to_grayscale(self) -> ImageList: @staticmethod def _convert_tensor_to_grayscale(tensor: Tensor) -> Tensor: + import torch + from torchvision.transforms.v2 import functional as func2 + if tensor.size(dim=-3) == 4: return torch.cat( [func2.rgb_to_grayscale(tensor[:, 0:3], num_output_channels=3), tensor[:, 3].unsqueeze(dim=1)], @@ -580,22 +602,31 @@ def _convert_tensor_to_grayscale(tensor: Tensor) -> Tensor: return func2.rgb_to_grayscale(tensor[:, 0:3], num_output_channels=3) def crop(self, x: int, y: int, width: int, height: int) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + _check_crop_errors_and_warnings(x, y, width, height, self.widths[0], self.heights[0], plural=True) image_list = self._clone_without_tensor() image_list._tensor = func2.crop(self._tensor, x, y, height, width) return image_list def flip_vertically(self) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + image_list = self._clone_without_tensor() image_list._tensor = func2.vertical_flip(self._tensor) return image_list def flip_horizontally(self) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + image_list = self._clone_without_tensor() image_list._tensor = func2.horizontal_flip(self._tensor) return image_list def adjust_brightness(self, factor: float) -> ImageList: + import torch + from torchvision.transforms.v2 import functional as func2 + _check_adjust_brightness_errors_and_warnings(factor, plural=True) image_list = self._clone_without_tensor() if self.channel == 4: @@ -608,12 +639,17 @@ def adjust_brightness(self, factor: float) -> ImageList: return image_list def add_noise(self, standard_deviation: float) -> ImageList: + import torch + _check_add_noise_errors(standard_deviation) image_list = self._clone_without_tensor() image_list._tensor = self._tensor + torch.normal(0, standard_deviation, self._tensor.size()) * 255 return image_list def adjust_contrast(self, factor: float) -> ImageList: + import torch + from torchvision.transforms.v2 import functional as func2 + _check_adjust_contrast_errors_and_warnings(factor, plural=True) image_list = self._clone_without_tensor() if self.channel == 4: @@ -634,12 +670,17 @@ def adjust_color_balance(self, factor: float) -> ImageList: return image_list def blur(self, radius: int) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + _check_blur_errors_and_warnings(radius, min(self.widths[0], self.heights[0]), plural=True) image_list = self._clone_without_tensor() image_list._tensor = func2.gaussian_blur(self._tensor, [radius * 2 + 1, radius * 2 + 1]) return image_list def sharpen(self, factor: float) -> ImageList: + import torch + from torchvision.transforms.v2 import functional as func2 + _check_sharpen_errors_and_warnings(factor, plural=True) image_list = self._clone_without_tensor() if self.channel == 4: @@ -652,6 +693,9 @@ def sharpen(self, factor: float) -> ImageList: return image_list def invert_colors(self) -> ImageList: + import torch + from torchvision.transforms.v2 import functional as func2 + image_list = self._clone_without_tensor() if self.channel == 4: image_list._tensor = torch.cat( @@ -663,17 +707,23 @@ def invert_colors(self) -> ImageList: return image_list def rotate_right(self) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + image_list = self._clone_without_tensor() image_list._tensor = func2.rotate(self._tensor, -90, expand=True) return image_list def rotate_left(self) -> ImageList: + from torchvision.transforms.v2 import functional as func2 + image_list = self._clone_without_tensor() image_list._tensor = func2.rotate(self._tensor, 90, expand=True) return image_list def find_edges(self) -> ImageList: - kernel = Image._FILTER_EDGES_KERNEL.to("cpu") + import torch + + kernel = Image._filter_edges_kernel().to("cpu") edges_tensor = torch.clamp( torch.nn.functional.conv2d( self.convert_to_grayscale()._as_single_size_image_list()._tensor.float()[:, 0].unsqueeze(dim=1), diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index 6488edb2a..ffb561125 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -1,10 +1,26 @@ """Classes that can store tabular data.""" -from ._column import Column -from ._row import Row -from ._table import Table -from ._tagged_table import TaggedTable -from ._time_series import TimeSeries +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._column import Column + from ._row import Row + from ._table import Table + from ._tagged_table import TaggedTable + from ._time_series import TimeSeries + +apipkg.initpkg( + __name__, + { + "Column": "._column:Column", + "Row": "._row:Row", + "Table": "._table:Table", + "TaggedTable": "._tagged_table:TaggedTable", + "TimeSeries": "._time_series:TimeSeries", + }, +) __all__ = [ "Column", diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 897c7dcf4..bfaaf0a93 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -6,11 +6,6 @@ from numbers import Number from typing import TYPE_CHECKING, Any, TypeVar, overload -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns - from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType @@ -24,12 +19,12 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterator + import pandas as pd + + T = TypeVar("T") R = TypeVar("R") -# Enable copy-on-write for pandas dataframes -pd.options.mode.copy_on_write = True - class Column(Sequence[T]): """ @@ -103,6 +98,11 @@ def __init__(self, name: str, data: Sequence[T] | None = None) -> None: >>> from safeds.data.tabular.containers import Column >>> column = Column("test", [1, 2, 3]) """ + import pandas as pd + + # Enable copy-on-write for pandas dataframes + pd.options.mode.copy_on_write = True + if data is None: data = [] @@ -503,6 +503,8 @@ def has_missing_values(self) -> bool: >>> column2.has_missing_values() False """ + import numpy as np + return self.any(lambda value: value is None or (isinstance(value, Number) and np.isnan(value))) # ------------------------------------------------------------------------------------------------------------------ @@ -937,6 +939,9 @@ def plot_boxplot(self) -> Image: >>> column = Column("test", [1, 2, 3]) >>> boxplot = column.plot_boxplot() """ + import matplotlib.pyplot as plt + import seaborn as sns + if not self.type.is_numeric(): raise NonNumericColumnError(f"{self.name} is of type {self._type}.") @@ -968,6 +973,9 @@ def plot_histogram(self) -> Image: >>> column = Column("test", [1, 2, 3]) >>> histogram = column.plot_histogram() """ + import matplotlib.pyplot as plt + import seaborn as sns + fig = plt.figure() ax = sns.histplot(data=self._data) ax.set_xticks(ax.get_xticks()) diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index a1bafa2ca..a69bce02a 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -5,8 +5,6 @@ from collections.abc import Callable, Mapping from typing import TYPE_CHECKING, Any -import pandas as pd - from safeds._utils import _structural_hash from safeds.data.tabular.typing import ColumnType, Schema from safeds.exceptions import UnknownColumnNameError @@ -14,8 +12,7 @@ if TYPE_CHECKING: from collections.abc import Iterator -# Enable copy-on-write for pandas dataframes -pd.options.mode.copy_on_write = True + import pandas as pd class Row(Mapping[str, Any]): @@ -121,6 +118,11 @@ def __init__(self, data: Mapping[str, Any] | None = None) -> None: >>> from safeds.data.tabular.containers import Row >>> row = Row({"a": 1, "b": 2}) """ + import pandas as pd + + # Enable copy-on-write for pandas dataframes + pd.options.mode.copy_on_write = True + if data is None: data = {} diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index cb5b05e84..672cfaa2b 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -7,17 +7,6 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, TypeVar -import Levenshtein -import matplotlib.pyplot as plt -import numpy as np -import openpyxl -import pandas as pd -import seaborn as sns -import torch -from pandas import DataFrame -from scipy import stats -from torch.utils.data import DataLoader, Dataset - from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType, Schema @@ -37,14 +26,15 @@ if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence + import numpy as np + import pandas as pd + from torch.utils.data import DataLoader, Dataset + from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer from ._tagged_table import TaggedTable from ._time_series import TimeSeries -# Enable copy-on-write for pandas dataframes -pd.options.mode.copy_on_write = True - # noinspection PyProtectedMember class Table: @@ -113,6 +103,8 @@ def from_csv_file(path: str | Path) -> Table: 0 1 2 1 1 0 0 7 """ + import pandas as pd + path = Path(path) if path.suffix != ".csv": raise WrongFileExtensionError(path, ".csv") @@ -158,6 +150,8 @@ def from_excel_file(path: str | Path) -> Table: 1 2 5 2 3 6 """ + import pandas as pd + path = Path(path) excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"] if path.suffix not in excel_extensions: @@ -200,6 +194,8 @@ def from_json_file(path: str | Path) -> Table: 1 2 5 2 3 6 """ + import pandas as pd + path = Path(path) if path.suffix != ".json": raise WrongFileExtensionError(path, ".json") @@ -276,6 +272,9 @@ def from_columns(columns: list[Column]) -> Table: 1 2 5 2 3 6 """ + import pandas as pd + from pandas import DataFrame + dataframe: DataFrame = pd.DataFrame() column_names = [] @@ -321,6 +320,9 @@ def from_rows(rows: list[Row]) -> Table: 0 1 2 1 3 4 """ + import pandas as pd + from pandas import DataFrame + if len(rows) == 0: return Table._from_pandas_dataframe(pd.DataFrame()) @@ -366,6 +368,8 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) -> a b 0 1 2 """ + import pandas as pd + data = data.reset_index(drop=True) result = object.__new__(Table) @@ -408,6 +412,11 @@ def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None: 1 2 5 2 3 6 """ + import pandas as pd + + # Enable copy-on-write for pandas dataframes + pd.options.mode.copy_on_write = True + if data is None: data = {} @@ -739,6 +748,8 @@ def _get_similar_columns(self, column_name: str) -> list[str]: similar_columns: list[str] A list of all column names in the Table that are similar or equal to the given column name. """ + import Levenshtein + similar_columns = [] similarity = 0.6 i = 0 @@ -785,6 +796,8 @@ def summarize_statistics(self) -> Table: 8 idness 1.0 1.0 9 stability 0.5 0.5 """ + import pandas as pd + if self.number_of_columns == 0: return Table( { @@ -996,6 +1009,9 @@ def add_row(self, row: Row) -> Table: 0 1 2 1 3 4 """ + import numpy as np + import pandas as pd + int_columns = [] if self.number_of_columns == 0: @@ -1057,6 +1073,8 @@ def add_rows(self, rows: list[Row] | Table) -> Table: 1 3 4 2 5 6 """ + import pandas as pd + if isinstance(rows, Table): if rows.number_of_columns == 0: return self @@ -1124,6 +1142,8 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table: a b 0 1 2 """ + import pandas as pd + rows: list[Row] = [row for row in self.to_rows() if query(row)] if len(rows) == 0: result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema) @@ -1394,6 +1414,9 @@ def remove_rows_with_outliers(self) -> Table: 9 0.0 0.00 0.0 -1000000 10 0.0 0.00 0.0 -1000000 """ + import numpy as np + from scipy import stats + table_without_nonnumericals = self.remove_columns_with_non_numerical_values() z_scores = np.absolute(stats.zscore(table_without_nonnumericals._data, nan_policy="omit")) filter_ = ((z_scores < 3) | np.isnan(z_scores)).all(axis=1) @@ -1931,6 +1954,9 @@ def plot_correlation_heatmap(self) -> Image: >>> table = Table.from_dict({"temperature": [10, 15, 20, 25, 30], "sales": [54, 74, 90, 206, 210]}) >>> image = table.plot_correlation_heatmap() """ + import matplotlib.pyplot as plt + import seaborn as sns + only_numerical = self.remove_columns_with_non_numerical_values() if self.number_of_rows == 0: @@ -2005,6 +2031,9 @@ def plot_lineplot(self, x_column_name: str, y_column_name: str) -> Image: >>> table = Table.from_dict({"temperature": [10, 15, 20, 25, 30], "sales": [54, 74, 90, 206, 210]}) >>> image = table.plot_lineplot("temperature", "sales") """ + import matplotlib.pyplot as plt + import seaborn as sns + if not self.has_column(x_column_name) or not self.has_column(y_column_name): similar_columns_x = self._get_similar_columns(x_column_name) similar_columns_y = self._get_similar_columns(y_column_name) @@ -2063,6 +2092,9 @@ def plot_scatterplot(self, x_column_name: str, y_column_name: str) -> Image: >>> table = Table.from_dict({"temperature": [10, 15, 20, 25, 30], "sales": [54, 74, 90, 206, 210]}) >>> image = table.plot_scatterplot("temperature", "sales") """ + import matplotlib.pyplot as plt + import seaborn as sns + if not self.has_column(x_column_name) or not self.has_column(y_column_name): similar_columns_x = self._get_similar_columns(x_column_name) similar_columns_y = self._get_similar_columns(y_column_name) @@ -2114,6 +2146,10 @@ def plot_boxplots(self) -> Image: >>> table = Table({"a":[1, 2], "b": [3, 42]}) >>> image = table.plot_boxplots() """ + import matplotlib.pyplot as plt + import pandas as pd + import seaborn as sns + numerical_table = self.remove_columns_with_non_numerical_values() if numerical_table.number_of_columns == 0: raise NonNumericColumnError("This table contains only non-numerical columns.") @@ -2156,6 +2192,10 @@ def plot_histograms(self) -> Image: >>> table = Table({"a": [2, 3, 5, 1], "b": [54, 74, 90, 2014]}) >>> image = table.plot_histograms() """ + import matplotlib.pyplot as plt + import pandas as pd + import seaborn as sns + col_wrap = min(self.number_of_columns, 3) data = pd.melt(self._data.map(lambda value: str(value)), value_vars=self.column_names) @@ -2235,6 +2275,8 @@ def to_excel_file(self, path: str | Path) -> None: >>> table = Table.from_dict({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> table.to_excel_file("./src/resources/to_excel_file.xlsx") """ + import openpyxl + path = Path(path) excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"] if path.suffix not in excel_extensions: @@ -2358,6 +2400,8 @@ def to_rows(self) -> list[Row]: 'b': 30 })] """ + import pandas as pd + return [ Row._from_pandas_dataframe( pd.DataFrame([list(series_row)], columns=self._schema.column_names), @@ -2433,6 +2477,9 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: The DataLoader. """ + import numpy as np + from torch.utils.data import DataLoader + features = self.to_rows() all_rows = [] for row in features: @@ -2440,16 +2487,23 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: for column_name in row: new_item.append(row.get_value(column_name)) all_rows.append(new_item.copy()) - return DataLoader(dataset=_CustomDataset(np.array(all_rows)), batch_size=batch_size) + return DataLoader(dataset=_create_dataset(np.array(all_rows)), batch_size=batch_size) + + +def _create_dataset(features: np.array) -> Dataset: + import numpy as np + import torch + from torch.utils.data import Dataset + class _CustomDataset(Dataset): + def __init__(self, features: np.array): + self.X = torch.from_numpy(features.astype(np.float32)) + self.len = self.X.shape[0] -class _CustomDataset(Dataset): - def __init__(self, features: np.array): - self.X = torch.from_numpy(features.astype(np.float32)) - self.len = self.X.shape[0] + def __getitem__(self, item: int) -> torch.Tensor: + return self.X[item] - def __getitem__(self, item: int) -> torch.Tensor: - return self.X[item] + def __len__(self) -> int: + return self.len - def __len__(self) -> int: - return self.len + return _CustomDataset(features) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 1e55f6961..6ca27ab73 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,10 +3,6 @@ import sys from typing import TYPE_CHECKING -import torch -from torch import Tensor -from torch.utils.data import DataLoader, Dataset - from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( @@ -19,6 +15,10 @@ from collections.abc import Callable, Mapping, Sequence from typing import Any + import numpy as np + from torch import Tensor + from torch.utils.data import DataLoader, Dataset + class TaggedTable(Table): """ @@ -893,9 +893,12 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> The DataLoader. """ + import torch + from torch.utils.data import DataLoader + if num_of_classes <= 2: return DataLoader( - dataset=_CustomDataset( + dataset=_create_dataset( torch.Tensor(self.features._data.values), torch.Tensor(self.target._data).unsqueeze(dim=-1), ), @@ -904,7 +907,7 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> ) else: return DataLoader( - dataset=_CustomDataset( + dataset=_create_dataset( torch.Tensor(self.features._data.values), torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes), ), @@ -913,15 +916,20 @@ def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> ) -class _CustomDataset(Dataset): +def _create_dataset(features: np.array, target: np.array) -> Dataset: + import torch + from torch.utils.data import Dataset + + class _CustomDataset(Dataset): + def __init__(self, features: Tensor, target: Tensor): + self.X = features.to(torch.float32) + self.Y = target.to(torch.float32) + self.len = self.X.size(dim=0) - def __init__(self, features: Tensor, target: Tensor): - self.X = features.to(torch.float32) - self.Y = target.to(torch.float32) - self.len = self.X.size(dim=0) + def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: + return self.X[item], self.Y[item] - def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: - return self.X[item], self.Y[item] + def __len__(self) -> int: + return self.len - def __len__(self) -> int: - return self.len + return _CustomDataset(features, target) diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index 64ae2bf0d..9be5b81e7 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -4,10 +4,6 @@ import sys from typing import TYPE_CHECKING -import matplotlib.pyplot as plt -import pandas as pd -import seaborn as sns - from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.containers import Column, Row, Table, TaggedTable @@ -172,6 +168,8 @@ def _from_table( >>> test_table = Table({"date": ["01.01", "01.02", "01.03", "01.04"], "f1": ["a", "b", "c", "a"], "t": [1,2,3,4]}) >>> timeseries = TimeSeries._from_table(test_table, "t", "date", ["f1"]) """ + import pandas as pd + table = table._as_table() if feature_names is not None and time_name in feature_names: raise ValueError(f"Column '{time_name}' can not be time and feature column.") @@ -242,9 +240,14 @@ def __init__( >>> from safeds.data.tabular.containers import TaggedTable >>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) """ + import pandas as pd + + # Enable copy-on-write for pandas dataframes + pd.options.mode.copy_on_write = True + # Validate inputs super().__init__(data) - _data = Table(data) + _data: Table = Table(data) if feature_names is None: self._features = Table() self._feature_names = [] @@ -266,6 +269,8 @@ def __init__( if len(self.target._data) == 0: self.target._data = pd.Series(name=target_name) + self._data = _data._data + def __eq__(self, other: object) -> bool: """ Compare two time series instances. @@ -1011,11 +1016,13 @@ def plot_lagplot(self, lag: int) -> Image: Examples -------- - >>> from safeds.data.tabular.containers import TimeSeries - >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], ) - >>> image = table.plot_lagplot(lag = 1) - + >>> from safeds.data.tabular.containers import TimeSeries + >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], ) + >>> image = table.plot_lagplot(lag = 1) """ + import matplotlib.pyplot as plt + import pandas as pd + if not self._target.type.is_numeric(): raise NonNumericColumnError("This time series target contains non-numerical columns.") ax = pd.plotting.lag_plot(self._target._data, lag=lag) @@ -1056,11 +1063,13 @@ def plot_lineplot(self, x_column_name: str | None = None, y_column_name: str | N Examples -------- - >>> from safeds.data.tabular.containers import TimeSeries - >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], ) - >>> image = table.plot_lineplot() - + >>> from safeds.data.tabular.containers import TimeSeries + >>> table = TimeSeries({"time":[1, 2], "target": [3, 4], "feature":[2,2]}, target_name= "target", time_name="time", feature_names=["feature"], ) + >>> image = table.plot_lineplot() """ + import matplotlib.pyplot as plt + import seaborn as sns + self._data.index.name = "index" if x_column_name is not None and not self.get_column(x_column_name).type.is_numeric(): raise NonNumericColumnError("The time series plotted column contains non-numerical columns.") @@ -1137,6 +1146,9 @@ def plot_scatterplot( >>> image = table.plot_scatterplot() """ + import matplotlib.pyplot as plt + import seaborn as sns + self._data.index.name = "index" if x_column_name is not None and not self.get_column(x_column_name).type.is_numeric(): raise NonNumericColumnError("The time series plotted column contains non-numerical columns.") @@ -1245,8 +1257,11 @@ def plot_compare_time_series(self, time_series: list[TimeSeries]) -> Image: ------ NonNumericColumnError if the target column contains non numerical values - """ + import matplotlib.pyplot as plt + import pandas as pd + import seaborn as sns + if not self._target.type.is_numeric(): raise NonNumericColumnError("The time series plotted column contains non-numerical columns.") diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index 5f45314aa..3ae5fc572 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -1,12 +1,31 @@ """Classes for transforming tabular data.""" -from ._discretizer import Discretizer -from ._imputer import Imputer -from ._label_encoder import LabelEncoder -from ._one_hot_encoder import OneHotEncoder -from ._range_scaler import RangeScaler -from ._standard_scaler import StandardScaler -from ._table_transformer import InvertibleTableTransformer, TableTransformer +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._discretizer import Discretizer + from ._imputer import Imputer + from ._label_encoder import LabelEncoder + from ._one_hot_encoder import OneHotEncoder + from ._range_scaler import RangeScaler + from ._standard_scaler import StandardScaler + from ._table_transformer import InvertibleTableTransformer, TableTransformer + +apipkg.initpkg( + __name__, + { + "Discretizer": "._discretizer:Discretizer", + "Imputer": "._imputer:Imputer", + "InvertibleTableTransformer": "._table_transformer:InvertibleTableTransformer", + "LabelEncoder": "._label_encoder:LabelEncoder", + "OneHotEncoder": "._one_hot_encoder:OneHotEncoder", + "RangeScaler": "._range_scaler:RangeScaler", + "StandardScaler": "._standard_scaler:StandardScaler", + "TableTransformer": "._table_transformer:TableTransformer", + }, +) __all__ = [ "Discretizer", diff --git a/src/safeds/data/tabular/transformation/_discretizer.py b/src/safeds/data/tabular/transformation/_discretizer.py index c5ed6baca..3845549f2 100644 --- a/src/safeds/data/tabular/transformation/_discretizer.py +++ b/src/safeds/data/tabular/transformation/_discretizer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer +from typing import TYPE_CHECKING from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import TableTransformer @@ -12,6 +12,9 @@ UnknownColumnNameError, ) +if TYPE_CHECKING: + from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer + class Discretizer(TableTransformer): """ @@ -63,6 +66,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> Discretizer: UnknownColumnNameError If one of the columns, that should be fitted is not in the table. """ + from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer + if table.number_of_rows == 0: raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows") diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index 9abc79b62..17103e4de 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -2,16 +2,16 @@ import sys import warnings -from typing import Any - -import pandas as pd -from sklearn.impute import SimpleImputer as sk_SimpleImputer +from typing import TYPE_CHECKING, Any from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import TableTransformer from safeds.data.tabular.typing import ImputerStrategy from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError +if TYPE_CHECKING: + from sklearn.impute import SimpleImputer as sk_SimpleImputer + class Imputer(TableTransformer): """ @@ -160,6 +160,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer: NonNumericColumnError If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical data. """ + from sklearn.impute import SimpleImputer as sk_SimpleImputer + if column_names is None: column_names = table.column_names else: @@ -238,6 +240,8 @@ def transform(self, table: Table) -> Table: ValueError If the table contains 0 rows. """ + import pandas as pd + # Transformer has not been fitted yet if self._wrapped_transformer is None or self._column_names is None: raise TransformerNotFittedError diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 26c732327..31b32b72f 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -1,8 +1,7 @@ from __future__ import annotations import warnings - -from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder +from typing import TYPE_CHECKING from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import ( @@ -10,6 +9,9 @@ ) from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError +if TYPE_CHECKING: + from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder + # noinspection PyProtectedMember class LabelEncoder(InvertibleTableTransformer): @@ -44,6 +46,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: ValueError If the table contains 0 rows. """ + from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder + if column_names is None: column_names = table.column_names else: diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index cf557b548..503d0817a 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -4,8 +4,6 @@ from collections import Counter from typing import Any -import numpy as np - from safeds.data.tabular.containers import Column, Table from safeds.data.tabular.transformation._table_transformer import ( InvertibleTableTransformer, @@ -93,6 +91,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: ValueError If the table contains 0 rows. """ + import numpy as np + if column_names is None: column_names = table.column_names else: @@ -178,6 +178,8 @@ def transform(self, table: Table) -> Table: ValueNotPresentWhenFittedError If a column in the to-be-transformed table contains a new value that was not already present in the table the OneHotEncoder was fitted on. """ + import numpy as np + # Transformer has not been fitted yet if self._column_names is None or self._value_to_column is None or self._value_to_column_nans is None: raise TransformerNotFittedError @@ -264,6 +266,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: ValueError If the table contains 0 rows. """ + import numpy as np + # Transformer has not been fitted yet if self._column_names is None or self._value_to_column is None or self._value_to_column_nans is None: raise TransformerNotFittedError diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 066c25632..0260d537d 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,11 +1,14 @@ from __future__ import annotations -from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler +from typing import TYPE_CHECKING from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError +if TYPE_CHECKING: + from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler + class RangeScaler(InvertibleTableTransformer): """ @@ -59,6 +62,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: ValueError If the table contains 0 rows. """ + from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler + if column_names is None: column_names = table.column_names else: diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 748209f9c..b74db5261 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -1,11 +1,14 @@ from __future__ import annotations -from sklearn.preprocessing import StandardScaler as sk_StandardScaler +from typing import TYPE_CHECKING from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError +if TYPE_CHECKING: + from sklearn.preprocessing import StandardScaler as sk_StandardScaler + class StandardScaler(InvertibleTableTransformer): """The StandardScaler transforms column values to a range by removing the mean and scaling to unit variance.""" @@ -41,6 +44,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: ValueError If the table contains 0 rows. """ + from sklearn.preprocessing import StandardScaler as sk_StandardScaler + if column_names is None: column_names = table.column_names else: diff --git a/src/safeds/data/tabular/typing/__init__.py b/src/safeds/data/tabular/typing/__init__.py index 8b9b4a849..5b6db59a2 100644 --- a/src/safeds/data/tabular/typing/__init__.py +++ b/src/safeds/data/tabular/typing/__init__.py @@ -1,8 +1,28 @@ """Types used to define the schema of a tabular dataset.""" -from ._column_type import Anything, Boolean, ColumnType, Integer, Nothing, RealNumber, String -from ._imputer_strategy import ImputerStrategy -from ._schema import Schema +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._column_type import Anything, Boolean, ColumnType, Integer, Nothing, RealNumber, String + from ._imputer_strategy import ImputerStrategy + from ._schema import Schema + +apipkg.initpkg( + __name__, + { + "Anything": "._column_type:Anything", + "Boolean": "._column_type:Boolean", + "ColumnType": "._column_type:ColumnType", + "ImputerStrategy": "._imputer_strategy:ImputerStrategy", + "Integer": "._column_type:Integer", + "Nothing": "._column_type:Nothing", + "RealNumber": "._column_type:RealNumber", + "Schema": "._schema:Schema", + "String": "._column_type:String", + }, +) __all__ = [ "Anything", diff --git a/src/safeds/data/tabular/typing/_column_type.py b/src/safeds/data/tabular/typing/_column_type.py index 2c4ca4f57..c738e1725 100644 --- a/src/safeds/data/tabular/typing/_column_type.py +++ b/src/safeds/data/tabular/typing/_column_type.py @@ -5,8 +5,6 @@ from types import NoneType from typing import TYPE_CHECKING, Any -import numpy as np - if TYPE_CHECKING: import pandas as pd @@ -47,11 +45,12 @@ def _data_type(data: pd.Series) -> ColumnType: NotImplementedError If the given data type is not supported. """ + import numpy as np def column_type_of_type(cell_type: Any) -> ColumnType: - if cell_type == int or cell_type == np.int64 or cell_type == np.int32: + if cell_type in (int, np.int64, np.int32): return Integer(is_nullable) - if cell_type == float or cell_type == np.float64 or cell_type == np.float32: + if cell_type in (float, np.float64, np.float32): return RealNumber(is_nullable) if cell_type == bool: return Boolean(is_nullable) diff --git a/src/safeds/data/tabular/typing/_imputer_strategy.py b/src/safeds/data/tabular/typing/_imputer_strategy.py index 2e2d2bd8f..ee8f0751f 100644 --- a/src/safeds/data/tabular/typing/_imputer_strategy.py +++ b/src/safeds/data/tabular/typing/_imputer_strategy.py @@ -1,9 +1,13 @@ -from abc import ABC, abstractmethod +from __future__ import annotations -from sklearn.impute import SimpleImputer as sk_SimpleImputer +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING from safeds._utils import _structural_hash +if TYPE_CHECKING: + from sklearn.impute import SimpleImputer as sk_SimpleImputer + class ImputerStrategy(ABC): """ diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 7ba77798f..96ee06c40 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -1,39 +1,83 @@ """Custom exceptions that can be raised by Safe-DS.""" -from safeds.exceptions._data import ( - ColumnIsTargetError, - ColumnIsTimeError, - ColumnLengthMismatchError, - ColumnSizeError, - DuplicateColumnNameError, - DuplicateIndexError, - IllegalFormatError, - IllegalSchemaModificationError, - IndexOutOfBoundsError, - MissingValuesColumnError, - NonNumericColumnError, - TransformerNotFittedError, - UnknownColumnNameError, - ValueNotPresentWhenFittedError, - WrongFileExtensionError, -) -from safeds.exceptions._generic import ( - Bound, - ClosedBound, - OpenBound, - OutOfBoundsError, -) -from safeds.exceptions._ml import ( - DatasetContainsTargetError, - DatasetMissesDataError, - DatasetMissesFeaturesError, - InputSizeError, - LearningError, - ModelNotFittedError, - NonTimeSeriesError, - PredictionError, - TestTrainDataMismatchError, - UntaggedTableError, +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from safeds.exceptions._data import ( + ColumnIsTargetError, + ColumnIsTimeError, + ColumnLengthMismatchError, + ColumnSizeError, + DuplicateColumnNameError, + DuplicateIndexError, + IllegalFormatError, + IllegalSchemaModificationError, + IndexOutOfBoundsError, + MissingValuesColumnError, + NonNumericColumnError, + TransformerNotFittedError, + UnknownColumnNameError, + ValueNotPresentWhenFittedError, + WrongFileExtensionError, + ) + from safeds.exceptions._generic import ( + Bound, + ClosedBound, + OpenBound, + OutOfBoundsError, + ) + from safeds.exceptions._ml import ( + DatasetContainsTargetError, + DatasetMissesDataError, + DatasetMissesFeaturesError, + InputSizeError, + LearningError, + ModelNotFittedError, + NonTimeSeriesError, + PredictionError, + TestTrainDataMismatchError, + UntaggedTableError, + ) + +apipkg.initpkg( + __name__, + { + # Generic exceptions + "OutOfBoundsError": "._generic:OutOfBoundsError", + # Data exceptions + "ColumnIsTargetError": "._data:ColumnIsTargetError", + "ColumnIsTimeError": "._data:ColumnIsTimeError", + "ColumnLengthMismatchError": "._data:ColumnLengthMismatchError", + "ColumnSizeError": "._data:ColumnSizeError", + "DuplicateColumnNameError": "._data:DuplicateColumnNameError", + "DuplicateIndexError": "._data:DuplicateIndexError", + "IllegalFormatError": "._data:IllegalFormatError", + "IllegalSchemaModificationError": "._data:IllegalSchemaModificationError", + "IndexOutOfBoundsError": "._data:IndexOutOfBoundsError", + "MissingValuesColumnError": "._data:MissingValuesColumnError", + "NonNumericColumnError": "._data:NonNumericColumnError", + "TransformerNotFittedError": "._data:TransformerNotFittedError", + "UnknownColumnNameError": "._data:UnknownColumnNameError", + "ValueNotPresentWhenFittedError": "._data:ValueNotPresentWhenFittedError", + "WrongFileExtensionError": "._data:WrongFileExtensionError", + # ML exceptions + "DatasetContainsTargetError": "._ml:DatasetContainsTargetError", + "DatasetMissesDataError": "._ml:DatasetMissesDataError", + "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError", + "InputSizeError": "._ml:InputSizeError", + "LearningError": "._ml:LearningError", + "ModelNotFittedError": "._ml:ModelNotFittedError", + "NonTimeSeriesError": "._ml:NonTimeSeriesError", + "PredictionError": "._ml:PredictionError", + "TestTrainDataMismatchError": "._ml:TestTrainDataMismatchError", + "UntaggedTableError": "._ml:UntaggedTableError", + # Other + "Bound": "._generic:Bound", + "ClosedBound": "._generic:ClosedBound", + "OpenBound": "._generic:OpenBound", + }, ) __all__ = [ diff --git a/src/safeds/exceptions/_generic.py b/src/safeds/exceptions/_generic.py index b874a4f30..f74168333 100644 --- a/src/safeds/exceptions/_generic.py +++ b/src/safeds/exceptions/_generic.py @@ -2,8 +2,6 @@ from abc import ABC, abstractmethod -from numpy import isinf, isnan - class OutOfBoundsError(ValueError): """ @@ -52,6 +50,8 @@ def __init__( * If actual does not lie outside the given interval. * If actual is not a real number. """ + from numpy import isinf, isnan + # Validate bound parameters: if lower_bound is None and upper_bound is None: raise ValueError("Illegal interval: Attempting to raise OutOfBoundsError, but no bounds given.") @@ -112,6 +112,8 @@ def __init__(self, value: float): ValueError If value is nan or if value is +/-inf and the Bound type does not allow for infinite Bounds. """ + from numpy import isnan + if isnan(value): raise ValueError("Bound must be a real number, not nan.") self._value = value diff --git a/src/safeds/ml/classical/classification/__init__.py b/src/safeds/ml/classical/classification/__init__.py index 5b333a83e..da036d11a 100644 --- a/src/safeds/ml/classical/classification/__init__.py +++ b/src/safeds/ml/classical/classification/__init__.py @@ -1,13 +1,32 @@ """Classes for classification tasks.""" -from ._ada_boost import AdaBoostClassifier -from ._classifier import Classifier -from ._decision_tree import DecisionTreeClassifier -from ._gradient_boosting import GradientBoostingClassifier -from ._k_nearest_neighbors import KNearestNeighborsClassifier -from ._logistic_regression import LogisticRegressionClassifier -from ._random_forest import RandomForestClassifier -from ._support_vector_machine import SupportVectorMachineClassifier +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._ada_boost import AdaBoostClassifier + from ._classifier import Classifier + from ._decision_tree import DecisionTreeClassifier + from ._gradient_boosting import GradientBoostingClassifier + from ._k_nearest_neighbors import KNearestNeighborsClassifier + from ._logistic_regression import LogisticRegressionClassifier + from ._random_forest import RandomForestClassifier + from ._support_vector_machine import SupportVectorMachineClassifier + +apipkg.initpkg( + __name__, + { + "AdaBoostClassifier": "._ada_boost:AdaBoostClassifier", + "Classifier": "._classifier:Classifier", + "DecisionTreeClassifier": "._decision_tree:DecisionTreeClassifier", + "GradientBoostingClassifier": "._gradient_boosting:GradientBoostingClassifier", + "KNearestNeighborsClassifier": "._k_nearest_neighbors:KNearestNeighborsClassifier", + "LogisticRegressionClassifier": "._logistic_regression:LogisticRegressionClassifier", + "RandomForestClassifier": "._random_forest:RandomForestClassifier", + "SupportVectorMachineClassifier": "._support_vector_machine:SupportVectorMachineClassifier", + }, +) __all__ = [ "AdaBoostClassifier", diff --git a/src/safeds/ml/classical/classification/_ada_boost.py b/src/safeds/ml/classical/classification/_ada_boost.py index 289b5ef31..11a3969e8 100644 --- a/src/safeds/ml/classical/classification/_ada_boost.py +++ b/src/safeds/ml/classical/classification/_ada_boost.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import AdaBoostClassifier as sk_AdaBoostClassifier - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.ensemble import AdaBoostClassifier as sk_AdaBoostClassifier from safeds.data.tabular.containers import Table, TaggedTable @@ -205,6 +204,8 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.ensemble import AdaBoostClassifier as sk_AdaBoostClassifier + learner = self.learner._get_sklearn_classifier() if self.learner is not None else None return sk_AdaBoostClassifier( estimator=learner, diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 1407df787..9d761520f 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -3,8 +3,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from sklearn.metrics import accuracy_score as sk_accuracy_score - from safeds._utils import _structural_hash from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import UntaggedTableError @@ -121,6 +119,8 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float: UntaggedTableError If the table is untagged. """ + from sklearn.metrics import accuracy_score as sk_accuracy_score + if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table): raise UntaggedTableError diff --git a/src/safeds/ml/classical/classification/_decision_tree.py b/src/safeds/ml/classical/classification/_decision_tree.py index 651a6101b..214dc83f4 100644 --- a/src/safeds/ml/classical/classification/_decision_tree.py +++ b/src/safeds/ml/classical/classification/_decision_tree.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.tree import DecisionTreeClassifier as sk_DecisionTreeClassifier - from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict @@ -11,6 +9,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.tree import DecisionTreeClassifier as sk_DecisionTreeClassifier from safeds.data.tabular.containers import Table, TaggedTable @@ -111,4 +110,6 @@ def is_fitted(self) -> bool: return self._wrapped_classifier is not None def _get_sklearn_classifier(self) -> ClassifierMixin: + from sklearn.tree import DecisionTreeClassifier as sk_DecisionTreeClassifier + return sk_DecisionTreeClassifier() diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py index 83eae4703..ba42bfb9f 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import GradientBoostingClassifier as sk_GradientBoostingClassifier - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.ensemble import GradientBoostingClassifier as sk_GradientBoostingClassifier from safeds.data.tabular.containers import Table, TaggedTable @@ -176,4 +175,6 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.ensemble import GradientBoostingClassifier as sk_GradientBoostingClassifier + return sk_GradientBoostingClassifier(n_estimators=self._number_of_trees, learning_rate=self._learning_rate) diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py index 3202c7615..eb882d51a 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, DatasetMissesDataError, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier from safeds.data.tabular.containers import Table, TaggedTable @@ -168,4 +167,6 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier + return sk_KNeighborsClassifier(self._number_of_neighbors, n_jobs=-1) diff --git a/src/safeds/ml/classical/classification/_logistic_regression.py b/src/safeds/ml/classical/classification/_logistic_regression.py index 913176cca..f0dffb783 100644 --- a/src/safeds/ml/classical/classification/_logistic_regression.py +++ b/src/safeds/ml/classical/classification/_logistic_regression.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.linear_model import LogisticRegression as sk_LogisticRegression - from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict @@ -11,6 +9,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.linear_model import LogisticRegression as sk_LogisticRegression from safeds.data.tabular.containers import Table, TaggedTable @@ -119,4 +118,6 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.linear_model import LogisticRegression as sk_LogisticRegression + return sk_LogisticRegression(n_jobs=-1) diff --git a/src/safeds/ml/classical/classification/_random_forest.py b/src/safeds/ml/classical/classification/_random_forest.py index f37e1cf39..133c056c8 100644 --- a/src/safeds/ml/classical/classification/_random_forest.py +++ b/src/safeds/ml/classical/classification/_random_forest.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier from safeds.data.tabular.containers import Table, TaggedTable @@ -155,4 +154,6 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier + return sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1) diff --git a/src/safeds/ml/classical/classification/_support_vector_machine.py b/src/safeds/ml/classical/classification/_support_vector_machine.py index 087b2316c..0c83de328 100644 --- a/src/safeds/ml/classical/classification/_support_vector_machine.py +++ b/src/safeds/ml/classical/classification/_support_vector_machine.py @@ -4,8 +4,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from sklearn.svm import SVC as sk_SVC # noqa: N811 - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -13,6 +11,7 @@ if TYPE_CHECKING: from sklearn.base import ClassifierMixin + from sklearn.svm import SVC as sk_SVC # noqa: N811 from safeds.data.tabular.containers import Table, TaggedTable @@ -326,4 +325,6 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: wrapped_classifier: ClassifierMixin The sklearn Classifier. """ + from sklearn.svm import SVC as sk_SVC # noqa: N811 + return sk_SVC(C=self._c) diff --git a/src/safeds/ml/classical/regression/__init__.py b/src/safeds/ml/classical/regression/__init__.py index 1bd71ac05..fc082d6aa 100644 --- a/src/safeds/ml/classical/regression/__init__.py +++ b/src/safeds/ml/classical/regression/__init__.py @@ -1,17 +1,40 @@ """Models for regression tasks.""" -from ._ada_boost import AdaBoostRegressor -from ._arima import ArimaModelRegressor -from ._decision_tree import DecisionTreeRegressor -from ._elastic_net_regression import ElasticNetRegressor -from ._gradient_boosting import GradientBoostingRegressor -from ._k_nearest_neighbors import KNearestNeighborsRegressor -from ._lasso_regression import LassoRegressor -from ._linear_regression import LinearRegressionRegressor -from ._random_forest import RandomForestRegressor -from ._regressor import Regressor -from ._ridge_regression import RidgeRegressor -from ._support_vector_machine import SupportVectorMachineRegressor +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._ada_boost import AdaBoostRegressor + from ._arima import ArimaModelRegressor + from ._decision_tree import DecisionTreeRegressor + from ._elastic_net_regression import ElasticNetRegressor + from ._gradient_boosting import GradientBoostingRegressor + from ._k_nearest_neighbors import KNearestNeighborsRegressor + from ._lasso_regression import LassoRegressor + from ._linear_regression import LinearRegressionRegressor + from ._random_forest import RandomForestRegressor + from ._regressor import Regressor + from ._ridge_regression import RidgeRegressor + from ._support_vector_machine import SupportVectorMachineRegressor + +apipkg.initpkg( + __name__, + { + "AdaBoostRegressor": "._ada_boost:AdaBoostRegressor", + "ArimaModelRegressor": "._arima:ArimaModelRegressor", + "DecisionTreeRegressor": "._decision_tree:DecisionTreeRegressor", + "ElasticNetRegressor": "._elastic_net_regression:ElasticNetRegressor", + "GradientBoostingRegressor": "._gradient_boosting:GradientBoostingRegressor", + "KNearestNeighborsRegressor": "._k_nearest_neighbors:KNearestNeighborsRegressor", + "LassoRegressor": "._lasso_regression:LassoRegressor", + "LinearRegressionRegressor": "._linear_regression:LinearRegressionRegressor", + "RandomForestRegressor": "._random_forest:RandomForestRegressor", + "Regressor": "._regressor:Regressor", + "RidgeRegressor": "._ridge_regression:RidgeRegressor", + "SupportVectorMachineRegressor": "._support_vector_machine:SupportVectorMachineRegressor", + }, +) __all__ = [ "AdaBoostRegressor", diff --git a/src/safeds/ml/classical/regression/_ada_boost.py b/src/safeds/ml/classical/regression/_ada_boost.py index 676473f43..9c5f23e7a 100644 --- a/src/safeds/ml/classical/regression/_ada_boost.py +++ b/src/safeds/ml/classical/regression/_ada_boost.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import AdaBoostRegressor as sk_AdaBoostRegressor - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.ensemble import AdaBoostRegressor as sk_AdaBoostRegressor from safeds.data.tabular.containers import Table, TaggedTable @@ -205,6 +204,8 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.ensemble import AdaBoostRegressor as sk_AdaBoostRegressor + learner = self._learner._get_sklearn_regressor() if self._learner is not None else None return sk_AdaBoostRegressor( estimator=learner, diff --git a/src/safeds/ml/classical/regression/_arima.py b/src/safeds/ml/classical/regression/_arima.py index 1ce412906..b70b4489d 100644 --- a/src/safeds/ml/classical/regression/_arima.py +++ b/src/safeds/ml/classical/regression/_arima.py @@ -2,9 +2,7 @@ import io import itertools - -import matplotlib.pyplot as plt -from statsmodels.tsa.arima.model import ARIMA +from typing import TYPE_CHECKING from safeds._utils import _structural_hash from safeds.data.image.containers import Image @@ -17,6 +15,9 @@ NonTimeSeriesError, ) +if TYPE_CHECKING: + from statsmodels.tsa.arima.model import ARIMA + class ArimaModelRegressor: """Auto Regressive Integrated Moving Average Model.""" @@ -67,6 +68,8 @@ def fit(self, time_series: TimeSeries) -> ArimaModelRegressor: DatasetMissesDataError If the training data contains no rows. """ + from statsmodels.tsa.arima.model import ARIMA + if not isinstance(time_series, TimeSeries) and isinstance(time_series, Table): raise NonTimeSeriesError if time_series.number_of_rows == 0: @@ -173,6 +176,8 @@ def plot_predictions(self, test_series: TimeSeries) -> Image: If predicting with the given dataset failed. """ + import matplotlib.pyplot as plt + if not self.is_fitted() or self._arima is None: raise ModelNotFittedError test_data = test_series.target._data.to_numpy() diff --git a/src/safeds/ml/classical/regression/_decision_tree.py b/src/safeds/ml/classical/regression/_decision_tree.py index 473906552..891639d3f 100644 --- a/src/safeds/ml/classical/regression/_decision_tree.py +++ b/src/safeds/ml/classical/regression/_decision_tree.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.tree import DecisionTreeRegressor as sk_DecisionTreeRegressor - from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict @@ -11,6 +9,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.tree import DecisionTreeRegressor as sk_DecisionTreeRegressor from safeds.data.tabular.containers import Table, TaggedTable @@ -119,4 +118,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.tree import DecisionTreeRegressor as sk_DecisionTreeRegressor + return sk_DecisionTreeRegressor() diff --git a/src/safeds/ml/classical/regression/_elastic_net_regression.py b/src/safeds/ml/classical/regression/_elastic_net_regression.py index 15f9b6db1..d5d2dc3e5 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regression.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regression.py @@ -4,8 +4,6 @@ from typing import TYPE_CHECKING from warnings import warn -from sklearn.linear_model import ElasticNet as sk_ElasticNet - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -14,6 +12,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.linear_model import ElasticNet as sk_ElasticNet from safeds.data.tabular.containers import Table, TaggedTable @@ -206,4 +205,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.linear_model import ElasticNet as sk_ElasticNet + return sk_ElasticNet(alpha=self._alpha, l1_ratio=self._lasso_ratio) diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py index c3905936b..6437f6410 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import GradientBoostingRegressor as sk_GradientBoostingRegressor - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.ensemble import GradientBoostingRegressor as sk_GradientBoostingRegressor from safeds.data.tabular.containers import Table, TaggedTable @@ -176,4 +175,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.ensemble import GradientBoostingRegressor as sk_GradientBoostingRegressor + return sk_GradientBoostingRegressor(n_estimators=self._number_of_trees, learning_rate=self._learning_rate) diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py index e4ee60ee1..84d145334 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.neighbors import KNeighborsRegressor as sk_KNeighborsRegressor - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, DatasetMissesDataError, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.neighbors import KNeighborsRegressor as sk_KNeighborsRegressor from safeds.data.tabular.containers import Table, TaggedTable @@ -169,4 +168,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.neighbors import KNeighborsRegressor as sk_KNeighborsRegressor + return sk_KNeighborsRegressor(self._number_of_neighbors, n_jobs=-1) diff --git a/src/safeds/ml/classical/regression/_lasso_regression.py b/src/safeds/ml/classical/regression/_lasso_regression.py index 1748b63cd..6e2ac4620 100644 --- a/src/safeds/ml/classical/regression/_lasso_regression.py +++ b/src/safeds/ml/classical/regression/_lasso_regression.py @@ -3,8 +3,6 @@ from typing import TYPE_CHECKING from warnings import warn -from sklearn.linear_model import Lasso as sk_Lasso - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -13,6 +11,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.linear_model import Lasso as sk_Lasso from safeds.data.tabular.containers import Table, TaggedTable @@ -160,4 +159,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.linear_model import Lasso as sk_Lasso + return sk_Lasso(alpha=self._alpha) diff --git a/src/safeds/ml/classical/regression/_linear_regression.py b/src/safeds/ml/classical/regression/_linear_regression.py index e577d0db1..d88c1bdc0 100644 --- a/src/safeds/ml/classical/regression/_linear_regression.py +++ b/src/safeds/ml/classical/regression/_linear_regression.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.linear_model import LinearRegression as sk_LinearRegression - from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict @@ -11,6 +9,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.linear_model import LinearRegression as sk_LinearRegression from safeds.data.tabular.containers import Table, TaggedTable @@ -119,4 +118,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.linear_model import LinearRegression as sk_LinearRegression + return sk_LinearRegression(n_jobs=-1) diff --git a/src/safeds/ml/classical/regression/_random_forest.py b/src/safeds/ml/classical/regression/_random_forest.py index d8959a36b..c97db625e 100644 --- a/src/safeds/ml/classical/regression/_random_forest.py +++ b/src/safeds/ml/classical/regression/_random_forest.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from sklearn.ensemble import RandomForestRegressor as sk_RandomForestRegressor - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -12,6 +10,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.ensemble import RandomForestRegressor as sk_RandomForestRegressor from safeds.data.tabular.containers import Table, TaggedTable @@ -150,4 +149,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.ensemble import RandomForestRegressor as sk_RandomForestRegressor + return sk_RandomForestRegressor(self._number_of_trees, n_jobs=-1) diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index dc2e7756b..91c768f2c 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -3,9 +3,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error -from sklearn.metrics import mean_squared_error as sk_mean_squared_error - from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import ColumnLengthMismatchError, UntaggedTableError @@ -120,6 +117,8 @@ def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float: UntaggedTableError If the table is untagged. """ + from sklearn.metrics import mean_squared_error as sk_mean_squared_error + if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table): raise UntaggedTableError expected = validation_or_test_set.target @@ -148,6 +147,8 @@ def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float: UntaggedTableError If the table is untagged. """ + from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error + if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table): raise UntaggedTableError expected = validation_or_test_set.target diff --git a/src/safeds/ml/classical/regression/_ridge_regression.py b/src/safeds/ml/classical/regression/_ridge_regression.py index a5ac44d03..1d6de0772 100644 --- a/src/safeds/ml/classical/regression/_ridge_regression.py +++ b/src/safeds/ml/classical/regression/_ridge_regression.py @@ -3,8 +3,6 @@ import warnings from typing import TYPE_CHECKING -from sklearn.linear_model import Ridge as sk_Ridge - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -13,6 +11,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.linear_model import Ridge as sk_Ridge from safeds.data.tabular.containers import Table, TaggedTable @@ -161,4 +160,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.linear_model import Ridge as sk_Ridge + return sk_Ridge(alpha=self._alpha) diff --git a/src/safeds/ml/classical/regression/_support_vector_machine.py b/src/safeds/ml/classical/regression/_support_vector_machine.py index 53fdac122..2366745cc 100644 --- a/src/safeds/ml/classical/regression/_support_vector_machine.py +++ b/src/safeds/ml/classical/regression/_support_vector_machine.py @@ -4,8 +4,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from sklearn.svm import SVR as sk_SVR # noqa: N811 - from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -13,6 +11,7 @@ if TYPE_CHECKING: from sklearn.base import RegressorMixin + from sklearn.svm import SVR as sk_SVR # noqa: N811 from safeds.data.tabular.containers import Table, TaggedTable @@ -326,4 +325,6 @@ def _get_sklearn_regressor(self) -> RegressorMixin: wrapped_regressor: RegressorMixin The sklearn Regressor. """ + from sklearn.svm import SVR as sk_SVR # noqa: N811 + return sk_SVR(C=self._c) diff --git a/src/safeds/ml/hyperparameters/__init__.py b/src/safeds/ml/hyperparameters/__init__.py index 67291eba7..6d51bc6ac 100644 --- a/src/safeds/ml/hyperparameters/__init__.py +++ b/src/safeds/ml/hyperparameters/__init__.py @@ -1,5 +1,17 @@ """Tools to work with hyperparameters of ML models.""" -from ._choice import Choice +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._choice import Choice + +apipkg.initpkg( + __name__, + { + "Choice": "._choice:Choice", + }, +) __all__ = ["Choice"] diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 9481e591e..6334e0d12 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -1,7 +1,21 @@ """Classes for classification tasks.""" -from ._forward_layer import ForwardLayer -from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._forward_layer import ForwardLayer + from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor + +apipkg.initpkg( + __name__, + { + "ForwardLayer": "._forward_layer:ForwardLayer", + "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier", + "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor", + }, +) __all__ = [ "ForwardLayer", diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index aa1690ab5..b94669096 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -1,9 +1,7 @@ -import copy -from collections.abc import Callable -from typing import Self +from __future__ import annotations -import torch -from torch import Tensor, nn +import copy +from typing import TYPE_CHECKING, Self from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import ( @@ -13,12 +11,18 @@ OutOfBoundsError, TestTrainDataMismatchError, ) -from safeds.ml.nn._layer import Layer + +if TYPE_CHECKING: + from collections.abc import Callable + + from torch import Tensor, nn + + from safeds.ml.nn._layer import Layer class NeuralNetworkRegressor: def __init__(self, layers: list[Layer]): - self._model = _InternalModel(layers, is_for_classification=False) + self._model = _create_internal_model(layers, is_for_classification=False) self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -66,6 +70,9 @@ def fit( trained_model : The trained Model """ + import torch + from torch import nn + if epoch_size < 1: raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: @@ -133,6 +140,8 @@ def predict(self, test_data: Table) -> TaggedTable: ModelNotFittedError If the model has not been fitted yet """ + import torch + if not self._is_fitted: raise ModelNotFittedError if not (sorted(test_data.column_names)).__eq__( @@ -162,7 +171,7 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier: def __init__(self, layers: list[Layer]): - self._model = _InternalModel(layers, is_for_classification=True) + self._model = _create_internal_model(layers, is_for_classification=True) self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False @@ -211,6 +220,9 @@ def fit( trained_model : The trained Model """ + import torch + from torch import nn + if epoch_size < 1: raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: @@ -281,6 +293,8 @@ def predict(self, test_data: Table) -> TaggedTable: ModelNotFittedError If the Model has not been fitted yet """ + import torch + if not self._is_fitted: raise ModelNotFittedError if not (sorted(test_data.column_names)).__eq__( @@ -315,32 +329,38 @@ def is_fitted(self) -> bool: return self._is_fitted -class _InternalModel(nn.Module): - def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: - super().__init__() - self._layer_list = layers - internal_layers = [] - previous_output_size = None +def _create_internal_model(layers: list[Layer], is_for_classification: bool) -> nn.Module: + from torch import nn - for layer in layers: - if previous_output_size is not None: - layer._set_input_size(previous_output_size) - internal_layers.append(layer._get_internal_layer(activation_function="relu")) - previous_output_size = layer.output_size + class _InternalModel(nn.Module): + def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: - if is_for_classification: - internal_layers.pop() - if layers[-1].output_size > 2: - internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) - else: - internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) - self._pytorch_layers = nn.Sequential(*internal_layers) + super().__init__() + self._layer_list = layers + internal_layers = [] + previous_output_size = None - @property - def input_size(self) -> int: - return self._layer_list[0].input_size + for layer in layers: + if previous_output_size is not None: + layer._set_input_size(previous_output_size) + internal_layers.append(layer._get_internal_layer(activation_function="relu")) + previous_output_size = layer.output_size + + if is_for_classification: + internal_layers.pop() + if layers[-1].output_size > 2: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) + else: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) + self._pytorch_layers = nn.Sequential(*internal_layers) + + @property + def input_size(self) -> int: + return self._layer_list[0].input_size + + def forward(self, x: Tensor) -> Tensor: + for layer in self._pytorch_layers: + x = layer(x) + return x - def forward(self, x: Tensor) -> Tensor: - for layer in self._pytorch_layers: - x = layer(x) - return x + return _InternalModel(layers, is_for_classification)