Skip to content

Commit

Permalink
feat: add fnn functionality (#529)
Browse files Browse the repository at this point in the history
Closes #522 

### Summary of Changes

Added Model and layer classes to support Feed Forward Neural Network
Functionality.
Added into_dataloader() Function in TaggedTable Class, that creates a
pytorch dataloader for any given tagged table, which can then be used to
train a FNN.
Tests for those modules are probably incomplete, as i wasnt really sure
what to test for, could use some feedback there.

---------

Co-authored-by: megalinter-bot <[email protected]>
Co-authored-by: Lars Reimann <[email protected]>
  • Loading branch information
3 people authored Mar 20, 2024
1 parent ca23f0f commit ce53153
Show file tree
Hide file tree
Showing 9 changed files with 792 additions and 1 deletion.
40 changes: 40 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
import openpyxl
import pandas as pd
import seaborn as sns
import torch
import xxhash
from pandas import DataFrame
from scipy import stats
from torch.utils.data import DataLoader, Dataset

from safeds.data.image.containers import Image
from safeds.data.tabular.typing import ColumnType, Schema
Expand Down Expand Up @@ -2392,3 +2394,41 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): #
data_copy = self._data.reset_index(drop=True)
data_copy.columns = self.column_names
return data_copy.__dataframe__(nan_as_null, allow_copy)

def _into_dataloader(self, batch_size: int) -> DataLoader:
"""
Return a Dataloader for the data stored in this table, used for training neural networks.
The original table is not modified.
Parameters
----------
batch_size
The size of data batches that should be loaded at one time.
Returns
-------
result :
The DataLoader.
"""
features = self.to_rows()
all_rows = []
for row in features:
new_item = []
for column_name in row:
new_item.append(row.get_value(column_name))
all_rows.append(new_item.copy())
return DataLoader(dataset=_CustomDataset(np.array(all_rows)), batch_size=batch_size)


class _CustomDataset(Dataset):
def __init__(self, features: np.array):
self.X = torch.from_numpy(features.astype(np.float32))
self.len = self.X.shape[0]

def __getitem__(self, item: int) -> torch.Tensor:
return self.X[item]

def __len__(self) -> int:
return self.len
46 changes: 45 additions & 1 deletion src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import sys
from typing import TYPE_CHECKING

import numpy as np
import torch
import xxhash
from torch.utils.data import DataLoader, Dataset

from safeds.data.tabular.containers import Column, Row, Table
from safeds.exceptions import (
Expand Down Expand Up @@ -190,7 +193,9 @@ def __hash__(self) -> int:
hash : int
The hash value.
"""
return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest()
return xxhash.xxh3_64(
hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8),
).intdigest()

def __sizeof__(self) -> int:
"""
Expand Down Expand Up @@ -871,3 +876,42 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg
target_name=self.target.name,
feature_names=self.features.column_names,
)

def _into_dataloader(self, batch_size: int) -> DataLoader:
"""
Return a Dataloader for the data stored in this table, used for training neural networks.
The original table is not modified.
Parameters
----------
batch_size
The size of data batches that should be loaded at one time.
Returns
-------
result :
The DataLoader.
"""
feature_rows = self.features.to_rows()
all_rows = []
for row in feature_rows:
new_item = []
for column_name in row:
new_item.append(row.get_value(column_name))
all_rows.append(new_item.copy())
return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size)


class _CustomDataset(Dataset):
def __init__(self, features: np.array, target: np.array):
self.X = torch.from_numpy(features.astype(np.float32))
self.Y = torch.from_numpy(target.astype(np.float32))
self.len = self.X.shape[0]

def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
return self.X[item], self.Y[item].unsqueeze(-1)

def __len__(self) -> int:
return self.len
10 changes: 10 additions & 0 deletions src/safeds/ml/nn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Classes for classification tasks."""

from ._fnn_layer import FNNLayer
from ._model import ClassificationNeuralNetwork, RegressionNeuralNetwork

__all__ = [
"FNNLayer",
"ClassificationNeuralNetwork",
"RegressionNeuralNetwork",
]
67 changes: 67 additions & 0 deletions src/safeds/ml/nn/_fnn_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from torch import nn

from safeds.exceptions import ClosedBound, OutOfBoundsError


class _InternalLayer(nn.Module):
def __init__(self, input_size: int, output_size: int, activation_function: str):
super().__init__()
self._layer = nn.Linear(input_size, output_size)
match activation_function:
case "sigmoid":
self._fn = nn.Sigmoid()
case "relu":
self._fn = nn.ReLU()
case "softmax":
self._fn = nn.Softmax()
case _:
raise ValueError("Unknown Activation Function: " + activation_function)

def forward(self, x: float) -> float:
return self._fn(self._layer(x))


class FNNLayer:
def __init__(self, output_size: int, input_size: int | None = None):
"""
Create a FNN Layer.
Parameters
----------
input_size
The number of neurons in the previous layer
output_size
The number of neurons in this layer
Raises
------
ValueError
If input_size < 1
If output_size < 1
"""
if input_size is not None:
self._set_input_size(input_size=input_size)
if output_size < 1:
raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1))
self._output_size = output_size

def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
return _InternalLayer(self._input_size, self._output_size, activation_function)

@property
def output_size(self) -> int:
"""
Get the output_size of this layer.
Returns
-------
result :
The Number of Neurons in this layer.
"""
return self._output_size

def _set_input_size(self, input_size: int) -> None:
if input_size < 1:
raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1))
self._input_size = input_size
Loading

0 comments on commit ce53153

Please sign in to comment.