From 74045d7ccd1aac8c1b844cf47fc93e413fe4368f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 2 Jun 2023 13:16:49 +0200 Subject: [PATCH 1/2] feat: Added `Table.group_by` to group a table by a given key Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> --- src/safeds/data/tabular/containers/_table.py | 28 ++++++++++++- .../containers/_table/test_group_by.py | 39 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/containers/_table/test_group_by.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 99709faea..9a2886253 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -4,7 +4,7 @@ import io import warnings from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, TypeVar import matplotlib.pyplot as plt import numpy as np @@ -717,6 +717,32 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table: result_table = self.from_rows(rows) return result_table + _T = TypeVar('_T') + + def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]: + """ + Return a dictionary with the output tables as values and the keys from the key_selector. + + This table is not modified. + + Parameters + ---------- + key_selector : Callable[[Row], _T] + A Callable that is applied to all rows and returns the key of the group. + + Returns + ------- + dictionary : dict + A dictionary containing the new tables as values and the selected keys as keys. + """ + dictionary: dict[Table._T, Table] = dict() + for row in self.to_rows(): + if key_selector(row) in dictionary.keys(): + dictionary[key_selector(row)] = dictionary[key_selector(row)].add_row(row) + else: + dictionary[key_selector(row)] = Table.from_rows([row]) + return dictionary + def keep_only_columns(self, column_names: list[str]) -> Table: """ Return a table with only the given column(s). diff --git a/tests/safeds/data/tabular/containers/_table/test_group_by.py b/tests/safeds/data/tabular/containers/_table/test_group_by.py new file mode 100644 index 000000000..2104b3549 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_group_by.py @@ -0,0 +1,39 @@ +from typing import Callable + +import pytest + +from safeds.data.tabular.containers import Table + + +@pytest.mark.parametrize( + ("table", "selector", "expected"), + [ + ( + Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", "e"]}), + lambda row: row["col1"], + {1: Table({"col1": [1, 1], "col2": ["a", "b"]}), 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), + 3: Table({"col1": [3], "col2": ["e"]})} + ), + ( + Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", 2]}), + lambda row: row["col1"], + {1: Table({"col1": [1, 1], "col2": ["a", "b"]}), 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), + 3: Table({"col1": [3], "col2": [2]})} + ), + ( + Table(), + lambda row: row["col1"], + {} + ), + ( + Table({"col1": [], "col2": []}), + lambda row: row["col1"], + {} + ) + ], + ids=["select by row1", "different types in column", "empty table", "table with no rows"] +) +def test_group_by(table: Table, selector: Callable, expected: dict) -> None: + out = table.group_by(selector) + assert out == expected + From e4c5882bf6b6c11bf992323e8a1fbce069ca36e4 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 2 Jun 2023 11:22:37 +0000 Subject: [PATCH 2/2] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_table.py | 6 ++-- .../containers/_table/test_group_by.py | 32 ++++++++----------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 9a2886253..4dc619a35 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -717,7 +717,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table: result_table = self.from_rows(rows) return result_table - _T = TypeVar('_T') + _T = TypeVar("_T") def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]: """ @@ -735,9 +735,9 @@ def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]: dictionary : dict A dictionary containing the new tables as values and the selected keys as keys. """ - dictionary: dict[Table._T, Table] = dict() + dictionary: dict[Table._T, Table] = {} for row in self.to_rows(): - if key_selector(row) in dictionary.keys(): + if key_selector(row) in dictionary: dictionary[key_selector(row)] = dictionary[key_selector(row)].add_row(row) else: dictionary[key_selector(row)] = Table.from_rows([row]) diff --git a/tests/safeds/data/tabular/containers/_table/test_group_by.py b/tests/safeds/data/tabular/containers/_table/test_group_by.py index 2104b3549..cfb38db36 100644 --- a/tests/safeds/data/tabular/containers/_table/test_group_by.py +++ b/tests/safeds/data/tabular/containers/_table/test_group_by.py @@ -1,7 +1,6 @@ -from typing import Callable +from collections.abc import Callable import pytest - from safeds.data.tabular.containers import Table @@ -11,29 +10,26 @@ ( Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", "e"]}), lambda row: row["col1"], - {1: Table({"col1": [1, 1], "col2": ["a", "b"]}), 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), - 3: Table({"col1": [3], "col2": ["e"]})} + { + 1: Table({"col1": [1, 1], "col2": ["a", "b"]}), + 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), + 3: Table({"col1": [3], "col2": ["e"]}), + }, ), ( Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", 2]}), lambda row: row["col1"], - {1: Table({"col1": [1, 1], "col2": ["a", "b"]}), 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), - 3: Table({"col1": [3], "col2": [2]})} - ), - ( - Table(), - lambda row: row["col1"], - {} + { + 1: Table({"col1": [1, 1], "col2": ["a", "b"]}), + 2: Table({"col1": [2, 2], "col2": ["c", "d"]}), + 3: Table({"col1": [3], "col2": [2]}), + }, ), - ( - Table({"col1": [], "col2": []}), - lambda row: row["col1"], - {} - ) + (Table(), lambda row: row["col1"], {}), + (Table({"col1": [], "col2": []}), lambda row: row["col1"], {}), ], - ids=["select by row1", "different types in column", "empty table", "table with no rows"] + ids=["select by row1", "different types in column", "empty table", "table with no rows"], ) def test_group_by(table: Table, selector: Callable, expected: dict) -> None: out = table.group_by(selector) assert out == expected -