feat: add fnn functionality (#529)

Closes #522 ### Summary of Changes Added Model and layer classes to support Feed Forward Neural Network Functionality. Added into_dataloader() Function in TaggedTable Class, that creates a pytorch dataloader for any given tagged table, which can then be used to train a FNN. Tests for those modules are probably incomplete, as i wasnt really sure what to test for, could use some feedback there. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann <mail@larsreimann.com>
Safe-DS · Mar 20, 2024 · ce53153 · ce53153
1 parent ca23f0f
commit ce53153
Show file tree

Hide file tree

Showing 9 changed files with 792 additions and 1 deletion.
diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
@@ -13,9 +13,11 @@
 import openpyxl
 import pandas as pd
 import seaborn as sns
+import torch
 import xxhash
 from pandas import DataFrame
 from scipy import stats
+from torch.utils.data import DataLoader, Dataset
 
 from safeds.data.image.containers import Image
 from safeds.data.tabular.typing import ColumnType, Schema
@@ -2392,3 +2394,41 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):  #
         data_copy = self._data.reset_index(drop=True)
         data_copy.columns = self.column_names
         return data_copy.__dataframe__(nan_as_null, allow_copy)
+
+    def _into_dataloader(self, batch_size: int) -> DataLoader:
+        """
+        Return a Dataloader for the data stored in this table, used for training neural networks.
+
+        The original table is not modified.
+
+        Parameters
+        ----------
+        batch_size
+            The size of data batches that should be loaded at one time.
+
+        Returns
+        -------
+        result :
+            The DataLoader.
+
+        """
+        features = self.to_rows()
+        all_rows = []
+        for row in features:
+            new_item = []
+            for column_name in row:
+                new_item.append(row.get_value(column_name))
+            all_rows.append(new_item.copy())
+        return DataLoader(dataset=_CustomDataset(np.array(all_rows)), batch_size=batch_size)
+
+
+class _CustomDataset(Dataset):
+    def __init__(self, features: np.array):
+        self.X = torch.from_numpy(features.astype(np.float32))
+        self.len = self.X.shape[0]
+
+    def __getitem__(self, item: int) -> torch.Tensor:
+        return self.X[item]
+
+    def __len__(self) -> int:
+        return self.len
diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py
@@ -3,7 +3,10 @@
 import sys
 from typing import TYPE_CHECKING
 
+import numpy as np
+import torch
 import xxhash
+from torch.utils.data import DataLoader, Dataset
 
 from safeds.data.tabular.containers import Column, Row, Table
 from safeds.exceptions import (
@@ -190,7 +193,9 @@ def __hash__(self) -> int:
         hash : int
             The hash value.
         """
-        return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest()
+        return xxhash.xxh3_64(
+            hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8),
+        ).intdigest()
 
     def __sizeof__(self) -> int:
         """
@@ -871,3 +876,42 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg
             target_name=self.target.name,
             feature_names=self.features.column_names,
         )
+
+    def _into_dataloader(self, batch_size: int) -> DataLoader:
+        """
+        Return a Dataloader for the data stored in this table, used for training neural networks.
+
+        The original table is not modified.
+
+        Parameters
+        ----------
+        batch_size
+            The size of data batches that should be loaded at one time.
+
+        Returns
+        -------
+        result :
+            The DataLoader.
+
+        """
+        feature_rows = self.features.to_rows()
+        all_rows = []
+        for row in feature_rows:
+            new_item = []
+            for column_name in row:
+                new_item.append(row.get_value(column_name))
+            all_rows.append(new_item.copy())
+        return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size)
+
+
+class _CustomDataset(Dataset):
+    def __init__(self, features: np.array, target: np.array):
+        self.X = torch.from_numpy(features.astype(np.float32))
+        self.Y = torch.from_numpy(target.astype(np.float32))
+        self.len = self.X.shape[0]
+
+    def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
+        return self.X[item], self.Y[item].unsqueeze(-1)
+
+    def __len__(self) -> int:
+        return self.len
diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py
@@ -0,0 +1,10 @@
+"""Classes for classification tasks."""
+
+from ._fnn_layer import FNNLayer
+from ._model import ClassificationNeuralNetwork, RegressionNeuralNetwork
+
+__all__ = [
+    "FNNLayer",
+    "ClassificationNeuralNetwork",
+    "RegressionNeuralNetwork",
+]
diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_fnn_layer.py
@@ -0,0 +1,67 @@
+from torch import nn
+
+from safeds.exceptions import ClosedBound, OutOfBoundsError
+
+
+class _InternalLayer(nn.Module):
+    def __init__(self, input_size: int, output_size: int, activation_function: str):
+        super().__init__()
+        self._layer = nn.Linear(input_size, output_size)
+        match activation_function:
+            case "sigmoid":
+                self._fn = nn.Sigmoid()
+            case "relu":
+                self._fn = nn.ReLU()
+            case "softmax":
+                self._fn = nn.Softmax()
+            case _:
+                raise ValueError("Unknown Activation Function: " + activation_function)
+
+    def forward(self, x: float) -> float:
+        return self._fn(self._layer(x))
+
+
+class FNNLayer:
+    def __init__(self, output_size: int, input_size: int | None = None):
+        """
+        Create a FNN Layer.
+
+        Parameters
+        ----------
+        input_size
+            The number of neurons in the previous layer
+        output_size
+            The number of neurons in this layer
+
+        Raises
+        ------
+        ValueError
+            If input_size < 1
+            If output_size < 1
+
+        """
+        if input_size is not None:
+            self._set_input_size(input_size=input_size)
+        if output_size < 1:
+            raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1))
+        self._output_size = output_size
+
+    def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
+        return _InternalLayer(self._input_size, self._output_size, activation_function)
+
+    @property
+    def output_size(self) -> int:
+        """
+        Get the output_size of this layer.
+
+        Returns
+        -------
+        result :
+            The Number of Neurons in this layer.
+        """
+        return self._output_size
+
+    def _set_input_size(self, input_size: int) -> None:
+        if input_size < 1:
+            raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1))
+        self._input_size = input_size