diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 1b678f8e..ddc68273 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -4,6 +4,8 @@ Release Notes Upcoming Version ---------------- +* The group dimension when grouping by a pandas dataframe is now always `group`. This fixes the case that the dataframe contains a column named `name`. + Version 0.3.14 -------------- diff --git a/linopy/expressions.py b/linopy/expressions.py index b75f66f4..49a7240d 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -148,12 +148,17 @@ def groupby(self) -> xarray.core.groupby.DatasetGroupBy: xarray.core.groupby.DataArrayGroupBy The groupby object. """ - if isinstance(self.group, (pd.Series, pd.DataFrame)): + if isinstance(self.group, pd.DataFrame): raise ValueError( - "Grouping by pandas objects is only supported in sum function." + "Grouping by a DataFrame only supported for `sum` operation with `use_fallback=False`." ) + if isinstance(self.group, pd.Series): + group_name = self.group.name or "group" + group = DataArray(self.group, name=group_name) + else: + group = self.group # type: ignore - return self.data.groupby(group=self.group, **self.kwargs) + return self.data.groupby(group=group, **self.kwargs) def map( self, func: Callable, shortcut: bool = False, args: tuple[()] = (), **kwargs @@ -210,7 +215,11 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression: non_fallback_types = (pd.Series, pd.DataFrame, xr.DataArray) if isinstance(self.group, non_fallback_types) and not use_fallback: group: pd.Series | pd.DataFrame | xr.DataArray = self.group - group_name = getattr(group, "name", "group") or "group" + if isinstance(group, pd.DataFrame): + # dataframes do not have a name, so we need to set it + group_name = "group" + else: + group_name = getattr(group, "name", "group") or "group" if isinstance(group, DataArray): group = group.to_pandas() @@ -224,7 +233,9 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression: group_dim = group.index.name if group_name == group_dim: - raise ValueError("Group name cannot be the same as group dimension") + raise ValueError( + "Group name cannot be the same as group dimension in non-fallback mode." + ) arrays = [group, group.groupby(group).cumcount()] idx = pd.MultiIndex.from_arrays( diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index cdf12e55..05b9b280 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -690,42 +690,74 @@ def test_linear_expression_groupby_with_name(v, use_fallback): assert grouped.nterm == 10 -def test_linear_expression_groupby_with_series(v): +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_series(v, use_fallback): expr = 1 * v groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"]) - grouped = expr.groupby(groups).sum() + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) assert "group" in grouped.dims assert (grouped.data.group == [1, 2]).all() assert grouped.nterm == 10 -def test_linear_expression_groupby_with_series_false(v): +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_series_with_name(v, use_fallback): + expr = 1 * v + groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes[v.dims[0]], name="my_group") + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + assert "my_group" in grouped.dims + assert (grouped.data.my_group == [1, 2]).all() + assert grouped.nterm == 10 + + +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_series_false(v, use_fallback): expr = 1 * v groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"]) groups.name = "dim_2" - with pytest.raises(ValueError): - expr.groupby(groups).sum() + if not use_fallback: + with pytest.raises(ValueError): + expr.groupby(groups).sum(use_fallback=use_fallback) + return + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + assert "dim_2" in grouped.dims + assert (grouped.data.dim_2 == [1, 2]).all() + assert grouped.nterm == 10 -def test_linear_expression_groupby_with_dataframe(v): +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_dataframe(v, use_fallback): expr = 1 * v groups = pd.DataFrame( {"a": [1] * 10 + [2] * 10, "b": list(range(4)) * 5}, index=v.indexes["dim_2"] ) - grouped = expr.groupby(groups).sum() + if use_fallback: + with pytest.raises(ValueError): + expr.groupby(groups).sum(use_fallback=use_fallback) + return + + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) index = pd.MultiIndex.from_frame(groups) assert "group" in grouped.dims assert set(grouped.data.group.values) == set(index.values) assert grouped.nterm == 3 -def test_linear_expression_groupby_with_dataarray(v): +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_dataarray(v, use_fallback): expr = 1 * v df = pd.DataFrame( {"a": [1] * 10 + [2] * 10, "b": list(range(4)) * 5}, index=v.indexes["dim_2"] ) groups = xr.DataArray(df) - grouped = expr.groupby(groups).sum() + + # this should not be the case, see https://github.com/PyPSA/linopy/issues/351 + if use_fallback: + with pytest.raises(KeyError): + expr.groupby(groups).sum(use_fallback=use_fallback) + return + + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) index = pd.MultiIndex.from_frame(df) assert "group" in grouped.dims assert set(grouped.data.group.values) == set(index.values)