Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added Table.group_by to group a table by a given key #343

Merged
merged 2 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import io
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, TypeVar

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -717,6 +717,32 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table:
result_table = self.from_rows(rows)
return result_table

_T = TypeVar("_T")

def group_by(self, key_selector: Callable[[Row], _T]) -> dict[_T, Table]:
"""
Return a dictionary with the output tables as values and the keys from the key_selector.

This table is not modified.

Parameters
----------
key_selector : Callable[[Row], _T]
A Callable that is applied to all rows and returns the key of the group.

Returns
-------
dictionary : dict
A dictionary containing the new tables as values and the selected keys as keys.
"""
dictionary: dict[Table._T, Table] = {}
for row in self.to_rows():
if key_selector(row) in dictionary:
dictionary[key_selector(row)] = dictionary[key_selector(row)].add_row(row)
else:
dictionary[key_selector(row)] = Table.from_rows([row])
return dictionary

def keep_only_columns(self, column_names: list[str]) -> Table:
"""
Return a table with only the given column(s).
Expand Down
35 changes: 35 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_group_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from collections.abc import Callable

import pytest
from safeds.data.tabular.containers import Table


@pytest.mark.parametrize(
("table", "selector", "expected"),
[
(
Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", "e"]}),
lambda row: row["col1"],
{
1: Table({"col1": [1, 1], "col2": ["a", "b"]}),
2: Table({"col1": [2, 2], "col2": ["c", "d"]}),
3: Table({"col1": [3], "col2": ["e"]}),
},
),
(
Table({"col1": [1, 1, 2, 2, 3], "col2": ["a", "b", "c", "d", 2]}),
lambda row: row["col1"],
{
1: Table({"col1": [1, 1], "col2": ["a", "b"]}),
2: Table({"col1": [2, 2], "col2": ["c", "d"]}),
3: Table({"col1": [3], "col2": [2]}),
},
),
(Table(), lambda row: row["col1"], {}),
(Table({"col1": [], "col2": []}), lambda row: row["col1"], {}),
],
ids=["select by row1", "different types in column", "empty table", "table with no rows"],
)
def test_group_by(table: Table, selector: Callable, expected: dict) -> None:
out = table.group_by(selector)
assert out == expected