Skip to content

Commit

Permalink
Merge pull request #352 from PyPSA/groupby-df-with-name-column
Browse files Browse the repository at this point in the history
groupby: fix pandas dataframe with column `name` as grouper
  • Loading branch information
FabianHofmann authored Sep 24, 2024
2 parents 705194a + 9e26384 commit b491595
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Release Notes
Upcoming Version
----------------

* The group dimension when grouping by a pandas dataframe is now always `group`. This fixes the case that the dataframe contains a column named `name`.

Version 0.3.14
--------------

Expand Down
21 changes: 16 additions & 5 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,17 @@ def groupby(self) -> xarray.core.groupby.DatasetGroupBy:
xarray.core.groupby.DataArrayGroupBy
The groupby object.
"""
if isinstance(self.group, (pd.Series, pd.DataFrame)):
if isinstance(self.group, pd.DataFrame):
raise ValueError(
"Grouping by pandas objects is only supported in sum function."
"Grouping by a DataFrame only supported for `sum` operation with `use_fallback=False`."
)
if isinstance(self.group, pd.Series):
group_name = self.group.name or "group"
group = DataArray(self.group, name=group_name)
else:
group = self.group # type: ignore

return self.data.groupby(group=self.group, **self.kwargs)
return self.data.groupby(group=group, **self.kwargs)

def map(
self, func: Callable, shortcut: bool = False, args: tuple[()] = (), **kwargs
Expand Down Expand Up @@ -210,7 +215,11 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression:
non_fallback_types = (pd.Series, pd.DataFrame, xr.DataArray)
if isinstance(self.group, non_fallback_types) and not use_fallback:
group: pd.Series | pd.DataFrame | xr.DataArray = self.group
group_name = getattr(group, "name", "group") or "group"
if isinstance(group, pd.DataFrame):
# dataframes do not have a name, so we need to set it
group_name = "group"
else:
group_name = getattr(group, "name", "group") or "group"

if isinstance(group, DataArray):
group = group.to_pandas()
Expand All @@ -224,7 +233,9 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression:

group_dim = group.index.name
if group_name == group_dim:
raise ValueError("Group name cannot be the same as group dimension")
raise ValueError(
"Group name cannot be the same as group dimension in non-fallback mode."
)

arrays = [group, group.groupby(group).cumcount()]
idx = pd.MultiIndex.from_arrays(
Expand Down
50 changes: 41 additions & 9 deletions test/test_linear_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,42 +690,74 @@ def test_linear_expression_groupby_with_name(v, use_fallback):
assert grouped.nterm == 10


def test_linear_expression_groupby_with_series(v):
@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_series(v, use_fallback):
expr = 1 * v
groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"])
grouped = expr.groupby(groups).sum()
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "group" in grouped.dims
assert (grouped.data.group == [1, 2]).all()
assert grouped.nterm == 10


def test_linear_expression_groupby_with_series_false(v):
@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_series_with_name(v, use_fallback):
expr = 1 * v
groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes[v.dims[0]], name="my_group")
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "my_group" in grouped.dims
assert (grouped.data.my_group == [1, 2]).all()
assert grouped.nterm == 10


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_series_false(v, use_fallback):
expr = 1 * v
groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"])
groups.name = "dim_2"
with pytest.raises(ValueError):
expr.groupby(groups).sum()
if not use_fallback:
with pytest.raises(ValueError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "dim_2" in grouped.dims
assert (grouped.data.dim_2 == [1, 2]).all()
assert grouped.nterm == 10


def test_linear_expression_groupby_with_dataframe(v):
@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataframe(v, use_fallback):
expr = 1 * v
groups = pd.DataFrame(
{"a": [1] * 10 + [2] * 10, "b": list(range(4)) * 5}, index=v.indexes["dim_2"]
)
grouped = expr.groupby(groups).sum()
if use_fallback:
with pytest.raises(ValueError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return

grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
index = pd.MultiIndex.from_frame(groups)
assert "group" in grouped.dims
assert set(grouped.data.group.values) == set(index.values)
assert grouped.nterm == 3


def test_linear_expression_groupby_with_dataarray(v):
@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataarray(v, use_fallback):
expr = 1 * v
df = pd.DataFrame(
{"a": [1] * 10 + [2] * 10, "b": list(range(4)) * 5}, index=v.indexes["dim_2"]
)
groups = xr.DataArray(df)
grouped = expr.groupby(groups).sum()

# this should not be the case, see https://github.com/PyPSA/linopy/issues/351
if use_fallback:
with pytest.raises(KeyError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return

grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
index = pd.MultiIndex.from_frame(df)
assert "group" in grouped.dims
assert set(grouped.data.group.values) == set(index.values)
Expand Down

0 comments on commit b491595

Please sign in to comment.