Skip to content

Commit

Permalink
feat: remove output conversions (#792)
Browse files Browse the repository at this point in the history
Closes partially #732

### Summary of Changes

Output conversions are the exact inversion of the input conversion, so
there is no need to specify them again. Now, a neural network only takes
an input conversion and a list of layers. This also gets rid of several
errors that could occur if input and output conversions did not fit
together.

In a later PR, the input conversion will also be removed, since they
mirror datasets.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
  • Loading branch information
lars-reimann and megalinter-bot authored May 20, 2024
1 parent dd8394b commit 46f2f5d
Show file tree
Hide file tree
Showing 25 changed files with 576 additions and 876 deletions.
4 changes: 2 additions & 2 deletions src/safeds/data/labeled/containers/_time_series_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

class TimeSeriesDataset:
"""
A time series dataset maps feature and time columns to a target column. Not like the TabularDataset a TimeSeries needs to contain one target and one time column, but can have empty features.
A time series dataset maps feature and time columns to a target column.
Create a time series dataset from a mapping of column names to their values.
Unlike a TabularDataset, a TimeSeries needs to contain one target and one time column, but can have empty features.
Parameters
----------
Expand Down
6 changes: 3 additions & 3 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ def get_column(self, name: str) -> Column:
"""
Get a column from the table.
**Note:** This operation must fully load the data into memory, which can be expensive.
Parameters
----------
name:
Expand Down Expand Up @@ -584,7 +582,9 @@ def get_column(self, name: str) -> Column:
+-----+
"""
_check_columns_exist(self, name)
return Column._from_polars_series(self._data_frame.get_column(name))
return Column._from_polars_series(
self._lazy_frame.select(name).collect().get_column(name),
)

def get_column_type(self, name: str) -> DataType:
"""
Expand Down
3 changes: 2 additions & 1 deletion src/safeds/exceptions/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def __init__(self) -> None:
class InputSizeError(Exception):
"""Raised when the amount of features being passed to a network does not match with its input size."""

def __init__(self, data_size: int | ModelImageSize, input_layer_size: int | ModelImageSize) -> None:
def __init__(self, data_size: int | ModelImageSize, input_layer_size: int | ModelImageSize | None) -> None:
# TODO: remove input_layer_size type None again
super().__init__(
f"The data size being passed to the network({data_size}) does not match with its input size({input_layer_size}). Consider changing the data size of the model or reformatting the data.",
)
Expand Down
127 changes: 56 additions & 71 deletions src/safeds/ml/nn/_model.py

Large diffs are not rendered by default.

32 changes: 9 additions & 23 deletions src/safeds/ml/nn/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,29 @@

if TYPE_CHECKING:
from ._input_converter import InputConversion
from ._input_converter_image import InputConversionImage
from ._input_converter_image_to_column import InputConversionImageToColumn
from ._input_converter_image_to_image import InputConversionImageToImage
from ._input_converter_image_to_table import InputConversionImageToTable
from ._input_converter_table import InputConversionTable
from ._input_converter_time_series import InputConversionTimeSeries
from ._output_converter import OutputConversion
from ._output_converter_image import (
OutputConversionImageToColumn,
OutputConversionImageToImage,
OutputConversionImageToTable,
)
from ._output_converter_table import OutputConversionTable
from ._output_converter_time_series import OutputConversionTimeSeries

apipkg.initpkg(
__name__,
{
"InputConversion": "._input_converter:InputConversion",
"InputConversionImage": "._input_converter_image:InputConversionImage",
"InputConversionImageToColumn": "._input_converter_image_to_column:InputConversionImageToColumn",
"InputConversionImageToImage": "._input_converter_image_to_image:InputConversionImageToImage",
"InputConversionImageToTable": "._input_converter_image_to_table:InputConversionImageToTable",
"InputConversionTable": "._input_converter_table:InputConversionTable",
"InputConversionTimeSeries": "._input_converter_time_series:InputConversionTimeSeries",
"OutputConversion": "._output_converter:OutputConversion",
"OutputConversionImageToColumn": "._output_converter_image:OutputConversionImageToColumn",
"OutputConversionImageToImage": "._output_converter_image:OutputConversionImageToImage",
"OutputConversionImageToTable": "._output_converter_image:OutputConversionImageToTable",
"OutputConversionTable": "._output_converter_table:OutputConversionTable",
"OutputConversionTimeSeries": "._output_converter_time_series:OutputConversionTimeSeries",
},
)

__all__ = [
"InputConversion",
"InputConversionImage",
"InputConversionImageToColumn",
"InputConversionImageToImage",
"InputConversionImageToTable",
"InputConversionTable",
"InputConversionTimeSeries",
"OutputConversion",
"OutputConversionImageToColumn",
"OutputConversionImageToImage",
"OutputConversionImageToTable",
"OutputConversionTable",
"OutputConversionTimeSeries",
]
22 changes: 10 additions & 12 deletions src/safeds/ml/nn/converters/_input_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from safeds.data.tabular.containers import Table

if TYPE_CHECKING:
from torch import Tensor
from torch.utils.data import DataLoader

from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
Expand All @@ -22,30 +23,27 @@ class InputConversion(Generic[FT, PT], ABC):

@property
@abstractmethod
def _data_size(self) -> int | ModelImageSize:
pass # pragma: no cover
def _data_size(self) -> int | ModelImageSize: ...

@abstractmethod
def _data_conversion_fit(
self,
input_data: FT,
batch_size: int,
num_of_classes: int = 1,
) -> DataLoader | ImageDataset:
pass # pragma: no cover
) -> DataLoader | ImageDataset: ...

@abstractmethod
def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | _SingleSizeImageList:
pass # pragma: no cover
def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | _SingleSizeImageList: ...

@abstractmethod
def _is_fit_data_valid(self, input_data: FT) -> bool:
pass # pragma: no cover
def _data_conversion_output(self, input_data: PT, output_data: Tensor, **kwargs: Any) -> FT: ...

@abstractmethod
def _is_predict_data_valid(self, input_data: PT) -> bool:
pass # pragma: no cover
def _is_fit_data_valid(self, input_data: FT) -> bool: ...

@abstractmethod
def _get_output_configuration(self) -> dict[str, Any]:
pass # pragma: no cover
def _is_predict_data_valid(self, input_data: PT) -> bool: ...

@abstractmethod
def _get_output_configuration(self) -> dict[str, Any]: ...
114 changes: 43 additions & 71 deletions src/safeds/ml/nn/converters/_input_converter_image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import sys
from abc import ABC
from typing import TYPE_CHECKING, Any

from safeds._utils import _structural_hash
Expand All @@ -16,25 +17,57 @@
from safeds.ml.nn.typing import ModelImageSize


class InputConversionImage(InputConversion[ImageDataset, ImageList]):
"""The input conversion for a neural network, defines the input parameters for the neural network."""
class _InputConversionImage(InputConversion[ImageDataset, ImageList], ABC):
"""
The input conversion for a neural network, defines the input parameters for the neural network.
Parameters
----------
image_size:
the size of the input images
"""

def __init__(self, image_size: ModelImageSize) -> None:
"""
Define the input parameters for the neural network in the input conversion.
Parameters
----------
image_size:
the size of the input images
"""
self._input_size = image_size
self._output_size: ModelImageSize | int | None = None
self._one_hot_encoder: OneHotEncoder | None = None
self._column_name: str | None = None
self._column_names: list[str] | None = None
self._output_type: type | None = None

def __hash__(self) -> int:
return _structural_hash(
self.__class__.__name__,
self._input_size,
self._output_size,
self._one_hot_encoder,
self._column_name,
self._column_names,
self._output_type,
)

def __eq__(self, other: object) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return (self is other) or (
self._input_size == other._input_size
and self._output_size == other._output_size
and self._one_hot_encoder == other._one_hot_encoder
and self._column_name == other._column_name
and self._column_names == other._column_names
and self._output_type == other._output_type
)

def __sizeof__(self) -> int:
return (
sys.getsizeof(self._input_size)
+ sys.getsizeof(self._output_size)
+ sys.getsizeof(self._one_hot_encoder)
+ sys.getsizeof(self._column_name)
+ sys.getsizeof(self._column_names)
+ sys.getsizeof(self._output_type)
)

@property
def _data_size(self) -> ModelImageSize:
return self._input_size
Expand Down Expand Up @@ -81,64 +114,3 @@ def _get_output_configuration(self) -> dict[str, Any]:
"column_name": self._column_name,
"one_hot_encoder": self._one_hot_encoder,
}

def __hash__(self) -> int:
"""
Return a deterministic hash value for this InputConversionImage.
Returns
-------
hash:
the hash value
"""
return _structural_hash(
self._input_size,
self._output_size,
self._one_hot_encoder,
self._column_name,
self._column_names,
self._output_type,
)

def __eq__(self, other: object) -> bool:
"""
Compare two InputConversionImage instances.
Parameters
----------
other:
The InputConversionImage instance to compare to.
Returns
-------
equals:
Whether the instances are the same.
"""
if not isinstance(other, InputConversionImage):
return NotImplemented
return (self is other) or (
self._input_size == other._input_size
and self._output_size == other._output_size
and self._one_hot_encoder == other._one_hot_encoder
and self._column_name == other._column_name
and self._column_names == other._column_names
and self._output_type == other._output_type
)

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
size:
Size of this object in bytes.
"""
return (
sys.getsizeof(self._input_size)
+ sys.getsizeof(self._output_size)
+ sys.getsizeof(self._one_hot_encoder)
+ sys.getsizeof(self._column_name)
+ sys.getsizeof(self._column_names)
+ sys.getsizeof(self._output_type)
)
55 changes: 55 additions & 0 deletions src/safeds/ml/nn/converters/_input_converter_image_to_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from safeds._config import _init_default_device
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.labeled.containers import ImageDataset
from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor
from safeds.data.tabular.containers import Column
from safeds.data.tabular.transformation import OneHotEncoder

from ._input_converter_image import _InputConversionImage

if TYPE_CHECKING:
from torch import Tensor

from safeds.data.image.containers import ImageList


class InputConversionImageToColumn(_InputConversionImage):
def _data_conversion_output(
self,
input_data: ImageList,
output_data: Tensor,
**kwargs: Any,
) -> ImageDataset[Column]:
import torch

_init_default_device()

if not isinstance(input_data, _SingleSizeImageList):
raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004
if "column_name" not in kwargs or not isinstance(kwargs.get("column_name"), str):
raise ValueError(
"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.",
)
if "one_hot_encoder" not in kwargs or not isinstance(kwargs.get("one_hot_encoder"), OneHotEncoder):
raise ValueError(
"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.",
)
one_hot_encoder: OneHotEncoder = kwargs["one_hot_encoder"]
column_name: str = kwargs["column_name"]

output = torch.zeros(len(input_data), len(one_hot_encoder._get_names_of_added_columns()))
output[torch.arange(len(input_data)), output_data] = 1

im_dataset: ImageDataset[Column] = ImageDataset[Column].__new__(ImageDataset)
im_dataset._output = _ColumnAsTensor._from_tensor(output, column_name, one_hot_encoder)
im_dataset._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data))))
im_dataset._shuffle_after_epoch = False
im_dataset._batch_size = 1
im_dataset._next_batch_index = 0
im_dataset._input_size = input_data.sizes[0]
im_dataset._input = input_data
return im_dataset
36 changes: 36 additions & 0 deletions src/safeds/ml/nn/converters/_input_converter_image_to_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from safeds._config import _init_default_device
from safeds.data.image.containers import ImageList
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.labeled.containers import ImageDataset

from ._input_converter_image import _InputConversionImage

if TYPE_CHECKING:
from torch import Tensor


class InputConversionImageToImage(_InputConversionImage):
def _data_conversion_output(
self,
input_data: ImageList,
output_data: Tensor,
**_kwargs: Any,
) -> ImageDataset[ImageList]:
import torch

_init_default_device()

if not isinstance(input_data, _SingleSizeImageList):
raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004

return ImageDataset[ImageList](
input_data,
_SingleSizeImageList._create_from_tensor(
(output_data * 255).to(torch.uint8),
list(range(output_data.size(dim=0))),
),
)
Loading

0 comments on commit 46f2f5d

Please sign in to comment.