Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove output conversions #792

Merged
merged 5 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/safeds/data/labeled/containers/_time_series_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

class TimeSeriesDataset:
"""
A time series dataset maps feature and time columns to a target column. Not like the TabularDataset a TimeSeries needs to contain one target and one time column, but can have empty features.
A time series dataset maps feature and time columns to a target column.

Create a time series dataset from a mapping of column names to their values.
Unlike a TabularDataset, a TimeSeries needs to contain one target and one time column, but can have empty features.

Parameters
----------
Expand Down
6 changes: 3 additions & 3 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ def get_column(self, name: str) -> Column:
"""
Get a column from the table.

**Note:** This operation must fully load the data into memory, which can be expensive.

Parameters
----------
name:
Expand Down Expand Up @@ -584,7 +582,9 @@ def get_column(self, name: str) -> Column:
+-----+
"""
_check_columns_exist(self, name)
return Column._from_polars_series(self._data_frame.get_column(name))
return Column._from_polars_series(
self._lazy_frame.select(name).collect().get_column(name),
)

def get_column_type(self, name: str) -> DataType:
"""
Expand Down
3 changes: 2 additions & 1 deletion src/safeds/exceptions/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def __init__(self) -> None:
class InputSizeError(Exception):
"""Raised when the amount of features being passed to a network does not match with its input size."""

def __init__(self, data_size: int | ModelImageSize, input_layer_size: int | ModelImageSize) -> None:
def __init__(self, data_size: int | ModelImageSize, input_layer_size: int | ModelImageSize | None) -> None:
# TODO: remove input_layer_size type None again
super().__init__(
f"The data size being passed to the network({data_size}) does not match with its input size({input_layer_size}). Consider changing the data size of the model or reformatting the data.",
)
Expand Down
127 changes: 56 additions & 71 deletions src/safeds/ml/nn/_model.py

Large diffs are not rendered by default.

32 changes: 9 additions & 23 deletions src/safeds/ml/nn/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,29 @@

if TYPE_CHECKING:
from ._input_converter import InputConversion
from ._input_converter_image import InputConversionImage
from ._input_converter_image_to_column import InputConversionImageToColumn
from ._input_converter_image_to_image import InputConversionImageToImage
from ._input_converter_image_to_table import InputConversionImageToTable
from ._input_converter_table import InputConversionTable
from ._input_converter_time_series import InputConversionTimeSeries
from ._output_converter import OutputConversion
from ._output_converter_image import (
OutputConversionImageToColumn,
OutputConversionImageToImage,
OutputConversionImageToTable,
)
from ._output_converter_table import OutputConversionTable
from ._output_converter_time_series import OutputConversionTimeSeries

apipkg.initpkg(
__name__,
{
"InputConversion": "._input_converter:InputConversion",
"InputConversionImage": "._input_converter_image:InputConversionImage",
"InputConversionImageToColumn": "._input_converter_image_to_column:InputConversionImageToColumn",
"InputConversionImageToImage": "._input_converter_image_to_image:InputConversionImageToImage",
"InputConversionImageToTable": "._input_converter_image_to_table:InputConversionImageToTable",
"InputConversionTable": "._input_converter_table:InputConversionTable",
"InputConversionTimeSeries": "._input_converter_time_series:InputConversionTimeSeries",
"OutputConversion": "._output_converter:OutputConversion",
"OutputConversionImageToColumn": "._output_converter_image:OutputConversionImageToColumn",
"OutputConversionImageToImage": "._output_converter_image:OutputConversionImageToImage",
"OutputConversionImageToTable": "._output_converter_image:OutputConversionImageToTable",
"OutputConversionTable": "._output_converter_table:OutputConversionTable",
"OutputConversionTimeSeries": "._output_converter_time_series:OutputConversionTimeSeries",
},
)

__all__ = [
"InputConversion",
"InputConversionImage",
"InputConversionImageToColumn",
"InputConversionImageToImage",
"InputConversionImageToTable",
"InputConversionTable",
"InputConversionTimeSeries",
"OutputConversion",
"OutputConversionImageToColumn",
"OutputConversionImageToImage",
"OutputConversionImageToTable",
"OutputConversionTable",
"OutputConversionTimeSeries",
]
22 changes: 10 additions & 12 deletions src/safeds/ml/nn/converters/_input_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from safeds.data.tabular.containers import Table

if TYPE_CHECKING:
from torch import Tensor
from torch.utils.data import DataLoader

from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
Expand All @@ -22,30 +23,27 @@ class InputConversion(Generic[FT, PT], ABC):

@property
@abstractmethod
def _data_size(self) -> int | ModelImageSize:
pass # pragma: no cover
def _data_size(self) -> int | ModelImageSize: ...

@abstractmethod
def _data_conversion_fit(
self,
input_data: FT,
batch_size: int,
num_of_classes: int = 1,
) -> DataLoader | ImageDataset:
pass # pragma: no cover
) -> DataLoader | ImageDataset: ...

@abstractmethod
def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | _SingleSizeImageList:
pass # pragma: no cover
def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | _SingleSizeImageList: ...

@abstractmethod
def _is_fit_data_valid(self, input_data: FT) -> bool:
pass # pragma: no cover
def _data_conversion_output(self, input_data: PT, output_data: Tensor, **kwargs: Any) -> FT: ...

@abstractmethod
def _is_predict_data_valid(self, input_data: PT) -> bool:
pass # pragma: no cover
def _is_fit_data_valid(self, input_data: FT) -> bool: ...

@abstractmethod
def _get_output_configuration(self) -> dict[str, Any]:
pass # pragma: no cover
def _is_predict_data_valid(self, input_data: PT) -> bool: ...

@abstractmethod
def _get_output_configuration(self) -> dict[str, Any]: ...
114 changes: 43 additions & 71 deletions src/safeds/ml/nn/converters/_input_converter_image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import sys
from abc import ABC
from typing import TYPE_CHECKING, Any

from safeds._utils import _structural_hash
Expand All @@ -16,25 +17,57 @@
from safeds.ml.nn.typing import ModelImageSize


class InputConversionImage(InputConversion[ImageDataset, ImageList]):
"""The input conversion for a neural network, defines the input parameters for the neural network."""
class _InputConversionImage(InputConversion[ImageDataset, ImageList], ABC):
"""
The input conversion for a neural network, defines the input parameters for the neural network.

Parameters
----------
image_size:
the size of the input images
"""

def __init__(self, image_size: ModelImageSize) -> None:
"""
Define the input parameters for the neural network in the input conversion.

Parameters
----------
image_size:
the size of the input images
"""
self._input_size = image_size
self._output_size: ModelImageSize | int | None = None
self._one_hot_encoder: OneHotEncoder | None = None
self._column_name: str | None = None
self._column_names: list[str] | None = None
self._output_type: type | None = None

def __hash__(self) -> int:
return _structural_hash(
self.__class__.__name__,
self._input_size,
self._output_size,
self._one_hot_encoder,
self._column_name,
self._column_names,
self._output_type,
)

def __eq__(self, other: object) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return (self is other) or (
self._input_size == other._input_size
and self._output_size == other._output_size
and self._one_hot_encoder == other._one_hot_encoder
and self._column_name == other._column_name
and self._column_names == other._column_names
and self._output_type == other._output_type
)

def __sizeof__(self) -> int:
return (
sys.getsizeof(self._input_size)
+ sys.getsizeof(self._output_size)
+ sys.getsizeof(self._one_hot_encoder)
+ sys.getsizeof(self._column_name)
+ sys.getsizeof(self._column_names)
+ sys.getsizeof(self._output_type)
)

@property
def _data_size(self) -> ModelImageSize:
return self._input_size
Expand Down Expand Up @@ -81,64 +114,3 @@ def _get_output_configuration(self) -> dict[str, Any]:
"column_name": self._column_name,
"one_hot_encoder": self._one_hot_encoder,
}

def __hash__(self) -> int:
"""
Return a deterministic hash value for this InputConversionImage.

Returns
-------
hash:
the hash value
"""
return _structural_hash(
self._input_size,
self._output_size,
self._one_hot_encoder,
self._column_name,
self._column_names,
self._output_type,
)

def __eq__(self, other: object) -> bool:
"""
Compare two InputConversionImage instances.

Parameters
----------
other:
The InputConversionImage instance to compare to.

Returns
-------
equals:
Whether the instances are the same.
"""
if not isinstance(other, InputConversionImage):
return NotImplemented
return (self is other) or (
self._input_size == other._input_size
and self._output_size == other._output_size
and self._one_hot_encoder == other._one_hot_encoder
and self._column_name == other._column_name
and self._column_names == other._column_names
and self._output_type == other._output_type
)

def __sizeof__(self) -> int:
"""
Return the complete size of this object.

Returns
-------
size:
Size of this object in bytes.
"""
return (
sys.getsizeof(self._input_size)
+ sys.getsizeof(self._output_size)
+ sys.getsizeof(self._one_hot_encoder)
+ sys.getsizeof(self._column_name)
+ sys.getsizeof(self._column_names)
+ sys.getsizeof(self._output_type)
)
55 changes: 55 additions & 0 deletions src/safeds/ml/nn/converters/_input_converter_image_to_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from safeds._config import _init_default_device
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.labeled.containers import ImageDataset
from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor
from safeds.data.tabular.containers import Column
from safeds.data.tabular.transformation import OneHotEncoder

from ._input_converter_image import _InputConversionImage

if TYPE_CHECKING:
from torch import Tensor

from safeds.data.image.containers import ImageList


class InputConversionImageToColumn(_InputConversionImage):
def _data_conversion_output(
self,
input_data: ImageList,
output_data: Tensor,
**kwargs: Any,
) -> ImageDataset[Column]:
import torch

_init_default_device()

if not isinstance(input_data, _SingleSizeImageList):
raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004
if "column_name" not in kwargs or not isinstance(kwargs.get("column_name"), str):
raise ValueError(
"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.",
)
if "one_hot_encoder" not in kwargs or not isinstance(kwargs.get("one_hot_encoder"), OneHotEncoder):
raise ValueError(
"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.",
)
one_hot_encoder: OneHotEncoder = kwargs["one_hot_encoder"]
column_name: str = kwargs["column_name"]

output = torch.zeros(len(input_data), len(one_hot_encoder._get_names_of_added_columns()))
output[torch.arange(len(input_data)), output_data] = 1

im_dataset: ImageDataset[Column] = ImageDataset[Column].__new__(ImageDataset)
im_dataset._output = _ColumnAsTensor._from_tensor(output, column_name, one_hot_encoder)
im_dataset._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data))))
im_dataset._shuffle_after_epoch = False
im_dataset._batch_size = 1
im_dataset._next_batch_index = 0
im_dataset._input_size = input_data.sizes[0]
im_dataset._input = input_data
return im_dataset
36 changes: 36 additions & 0 deletions src/safeds/ml/nn/converters/_input_converter_image_to_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from safeds._config import _init_default_device
from safeds.data.image.containers import ImageList
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.labeled.containers import ImageDataset

from ._input_converter_image import _InputConversionImage

if TYPE_CHECKING:
from torch import Tensor


class InputConversionImageToImage(_InputConversionImage):
def _data_conversion_output(
self,
input_data: ImageList,
output_data: Tensor,
**_kwargs: Any,
) -> ImageDataset[ImageList]:
import torch

_init_default_device()

if not isinstance(input_data, _SingleSizeImageList):
raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004

return ImageDataset[ImageList](
input_data,
_SingleSizeImageList._create_from_tensor(
(output_data * 255).to(torch.uint8),
list(range(output_data.size(dim=0))),
),
)
Loading