Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: join #870

Merged
merged 42 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
1b37cd6
issue #750 regularization strength for logistic classifier
Jun 21, 2024
4081bca
first try
Jun 28, 2024
4fdface
finished ISSUE 745 join
Jun 28, 2024
7fb0b7a
import warning fixed
Jun 28, 2024
08ffdb2
linter problems solved
Jun 28, 2024
c02dce6
Merge branch 'main' into 745-join
zogomii Jun 28, 2024
509c07d
linter problems
Jun 28, 2024
440a89f
linter fixes #2
Jun 28, 2024
bf990a5
linter fixes #3
Jun 28, 2024
44f3210
linter fixes #4
Jun 28, 2024
f6ce318
style: apply automated linter fixes
megalinter-bot Jun 28, 2024
76561ec
style: apply automated linter fixes
megalinter-bot Jun 28, 2024
3ce2aca
Merge branch 'refs/heads/main' into 745-join
lars-reimann Jun 29, 2024
ac6b862
perf: use lazy frames instead of data frames
lars-reimann Jun 29, 2024
22faea8
documentation for join
Jul 5, 2024
af60c7e
style: apply automated linter fixes
megalinter-bot Jul 5, 2024
7119cf5
small changes in documentation - join
Jul 5, 2024
b54ea09
test doc join
Jul 5, 2024
2251f9f
test rückgängig
Jul 5, 2024
9224097
small changes
Jul 12, 2024
0776679
Merge branch 'main' into 745-join
Jul 12, 2024
4abd576
changed None to null in documentation
Jul 12, 2024
86a110b
formatierung
Jul 12, 2024
d550f95
Validation
Jul 12, 2024
3e96f55
formatierungsfehler gefixed
Jul 12, 2024
bbb3576
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
eab9833
tests for validation
Jul 12, 2024
099aaff
correction
Jul 12, 2024
55d5d2f
linter
Jul 12, 2024
1e7af57
literal mode
Jul 12, 2024
2b2962b
removed mode
Jul 12, 2024
524df73
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
eafb51c
columnNotFound
Jul 12, 2024
10b26fa
import columnNotFoundError
Jul 12, 2024
4148512
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
ce8aa8d
docstrings are not redundant anymore
Jul 18, 2024
57abcee
tests for check column exists
Jul 18, 2024
de5de4c
tests were already there so we deleted the redundant tests
Jul 18, 2024
2bf76fc
style: apply automated linter fixes
megalinter-bot Jul 18, 2024
0d5edc6
test: rename tests to match convention
lars-reimann Jul 19, 2024
f65f20e
test: parametrize test
lars-reimann Jul 19, 2024
172bbb8
test: parametrize test
lars-reimann Jul 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,22 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer
"""
return fitted_transformer.inverse_transform(self)

def join(
self,
right_table: Table,
left_names: str | list[str],
right_names: str | list[str],
*,
mode: Literal["inner", "left", "outer"] = "inner",
) -> Table:
lars-reimann marked this conversation as resolved.
Show resolved Hide resolved
joined_dataframe = self._data_frame.join(
lars-reimann marked this conversation as resolved.
Show resolved Hide resolved
right_table._data_frame,
left_on=left_names,
right_on=right_names,
how=mode,
)
return self._from_polars_data_frame(joined_dataframe)

def transform_table(self, fitted_transformer: TableTransformer) -> Table:
"""
Return a new table transformed by a **fitted** transformer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ class LogisticClassifier(Classifier):
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self) -> None:
def __init__(self, c: float = 1.0) -> None:
super().__init__()
self.c = c

def __hash__(self) -> int:
return _structural_hash(
Expand All @@ -30,12 +31,13 @@ def __hash__(self) -> int:
# ------------------------------------------------------------------------------------------------------------------

def _clone(self) -> LogisticClassifier:
return LogisticClassifier()
return LogisticClassifier(c=self.c)

def _get_sklearn_model(self) -> ClassifierMixin:
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression

return SklearnLogisticRegression(
random_state=_get_random_seed(),
n_jobs=-1,
C=self.c,
)
60 changes: 60 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import Literal

import pytest
from safeds.data.tabular.containers import Table


@pytest.mark.parametrize(
("table_left", "table_right", "left_names", "right_names", "mode", "table_expected"),
[
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"outer",
Table({"a": [1, None, 2], "b": [3, None, 4], "d": [1, 5, None], "e": [5, 6, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"left",
Table({"a": [1, 2], "b": [3, 4], "e": [5, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"inner",
Table({"a": [1], "b": [3], "e": [5]}),
),
(
Table({"a": [1, 2], "b": [3, 4], "c": [5, 6]}),
Table({"d": [1, 5], "e": [5, 6], "g": [7, 9]}),
["a", "c"],
["d", "e"],
"inner",
Table({"a": [1], "b": [3], "c": [5], "g": [7]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["b"],
["e"],
"inner",
Table({"a": [], "b": [], "d": []}),
),
],
)
def test_join(
table_left: Table,
table_right: Table,
left_names: list[str],
right_names: list[str],
mode: Literal["inner", "left", "outer"],
table_expected: Table,
) -> None:
assert table_left.join(table_right, left_names, right_names, mode=mode) == table_expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table
from safeds.ml.classical.classification import LogisticClassifier


@pytest.fixture()
def training_set() -> TabularDataset:
table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
return table.to_tabular_dataset(target_name="col1")


class TestC:
def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
assert fitted_model.c == 2

def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
assert fitted_model._wrapped_model is not None
assert fitted_model._wrapped_model.C == 2

def test_clone(self, training_set: TabularDataset) -> None:
fitted_model = LogisticClassifier(c=2).fit(training_set)
cloned_classifier = fitted_model._clone()
assert isinstance(cloned_classifier, LogisticClassifier)
assert cloned_classifier.c == fitted_model.c