Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: join #870

Merged
merged 42 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
1b37cd6
issue #750 regularization strength for logistic classifier
Jun 21, 2024
4081bca
first try
Jun 28, 2024
4fdface
finished ISSUE 745 join
Jun 28, 2024
7fb0b7a
import warning fixed
Jun 28, 2024
08ffdb2
linter problems solved
Jun 28, 2024
c02dce6
Merge branch 'main' into 745-join
zogomii Jun 28, 2024
509c07d
linter problems
Jun 28, 2024
440a89f
linter fixes #2
Jun 28, 2024
bf990a5
linter fixes #3
Jun 28, 2024
44f3210
linter fixes #4
Jun 28, 2024
f6ce318
style: apply automated linter fixes
megalinter-bot Jun 28, 2024
76561ec
style: apply automated linter fixes
megalinter-bot Jun 28, 2024
3ce2aca
Merge branch 'refs/heads/main' into 745-join
lars-reimann Jun 29, 2024
ac6b862
perf: use lazy frames instead of data frames
lars-reimann Jun 29, 2024
22faea8
documentation for join
Jul 5, 2024
af60c7e
style: apply automated linter fixes
megalinter-bot Jul 5, 2024
7119cf5
small changes in documentation - join
Jul 5, 2024
b54ea09
test doc join
Jul 5, 2024
2251f9f
test rückgängig
Jul 5, 2024
9224097
small changes
Jul 12, 2024
0776679
Merge branch 'main' into 745-join
Jul 12, 2024
4abd576
changed None to null in documentation
Jul 12, 2024
86a110b
formatierung
Jul 12, 2024
d550f95
Validation
Jul 12, 2024
3e96f55
formatierungsfehler gefixed
Jul 12, 2024
bbb3576
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
eab9833
tests for validation
Jul 12, 2024
099aaff
correction
Jul 12, 2024
55d5d2f
linter
Jul 12, 2024
1e7af57
literal mode
Jul 12, 2024
2b2962b
removed mode
Jul 12, 2024
524df73
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
eafb51c
columnNotFound
Jul 12, 2024
10b26fa
import columnNotFoundError
Jul 12, 2024
4148512
style: apply automated linter fixes
megalinter-bot Jul 12, 2024
ce8aa8d
docstrings are not redundant anymore
Jul 18, 2024
57abcee
tests for check column exists
Jul 18, 2024
de5de4c
tests were already there so we deleted the redundant tests
Jul 18, 2024
2bf76fc
style: apply automated linter fixes
megalinter-bot Jul 18, 2024
0d5edc6
test: rename tests to match convention
lars-reimann Jul 19, 2024
f65f20e
test: parametrize test
lars-reimann Jul 19, 2024
172bbb8
test: parametrize test
lars-reimann Jul 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,65 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer
"""
return fitted_transformer.inverse_transform(self)

def join(
self,
right_table: Table,
left_names: str | list[str],
right_names: str | list[str],
*,
mode: Literal["inner", "left", "outer"] = "inner",
) -> Table:
lars-reimann marked this conversation as resolved.
Show resolved Hide resolved
"""
Join a table with the current table and return the result.

Parameters
----------
right_table:
The other table which is to be joined to the current table.
left_names:
Name or list of names of columns from the current table on which to join right_table.
right_names:
Name or list of names of columns from right_table on which to join the current table.
mode:
Specify which type of join you want to use. Options include 'inner', 'outer', 'left', 'right'.

Returns
-------
new_table:
The table with the joined table.

Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table1 = Table({"a": [1, 2], "b": [3, 4]})
>>> table2 = Table({"d": [1, 5], "e": [5, 6]})
>>> table1.join(table2, "a", "d", mode="left")
+-----+-----+------+
| a | b | e |
| --- | --- | --- |
| i64 | i64 | i64 |
+==================+
| 1 | 3 | 5 |
| 2 | 4 | null |
+-----+-----+------+
"""
# Validation
_check_columns_exist(self, left_names)
_check_columns_exist(right_table, right_names)
lars-reimann marked this conversation as resolved.
Show resolved Hide resolved

if len(left_names) != len(right_names):
raise ValueError("The number of columns to join on must be the same in both tables.")
lars-reimann marked this conversation as resolved.
Show resolved Hide resolved

# Implementation
return self._from_polars_lazy_frame(
self._lazy_frame.join(
right_table._lazy_frame,
left_on=left_names,
right_on=right_names,
how=mode,
),
)

def transform_table(self, fitted_transformer: TableTransformer) -> Table:
"""
Return a new table transformed by a **fitted** transformer.
Expand Down
91 changes: 91 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from typing import Literal

import pytest
from safeds.data.tabular.containers import Table
from safeds.exceptions import ColumnNotFoundError


@pytest.mark.parametrize(
("table_left", "table_right", "left_names", "right_names", "mode", "table_expected"),
[
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"outer",
Table({"a": [1, None, 2], "b": [3, None, 4], "d": [1, 5, None], "e": [5, 6, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"left",
Table({"a": [1, 2], "b": [3, 4], "e": [5, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"inner",
Table({"a": [1], "b": [3], "e": [5]}),
),
(
Table({"a": [1, 2], "b": [3, 4], "c": [5, 6]}),
Table({"d": [1, 5], "e": [5, 6], "g": [7, 9]}),
["a", "c"],
["d", "e"],
"inner",
Table({"a": [1], "b": [3], "c": [5], "g": [7]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["b"],
["e"],
"inner",
Table({"a": [], "b": [], "d": []}),
),
],
)
def test_should_join_two_tables(
table_left: Table,
table_right: Table,
left_names: list[str],
right_names: list[str],
mode: Literal["inner", "left", "outer"],
table_expected: Table,
) -> None:
assert table_left.join(table_right, left_names, right_names, mode=mode) == table_expected


def test_should_raise_if_columns_are_mismatched() -> None:
table_left = Table({"a": [1, 2], "b": [3, 4]})
table_right = Table({"d": [1, 5], "e": [5, 6]})
left_names = ["a"]
right_names = ["d", "e"]
with pytest.raises(ValueError, match="The number of columns to join on must be the same in both tables."):
table_left.join(table_right, left_names, right_names)


@pytest.mark.parametrize(
("table_left", "table_right", "left_names", "right_names"),
[
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["c"], ["d"]),
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["a"], ["f"]),
],
ids=[
"wrong_left_name",
"wrong_right_name",
],
)
def test_should_raise_if_columns_are_missing(
table_left: Table,
table_right: Table,
left_names: list[str],
right_names: list[str],
) -> None:
with pytest.raises(ColumnNotFoundError):
table_left.join(table_right, left_names=left_names, right_names=right_names)