From 4e08186660eb68fe9fee6e7290034f0ec2dd22c8 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Wed, 18 Jan 2023 11:33:19 -0700
Subject: [PATCH 1/3] Recognize grouping by IntervalIndex as binning

---
 flox/core.py         |  9 +++++++--
 flox/xarray.py       |  4 +++-
 tests/test_xarray.py | 15 +++++++++++++--
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/flox/core.py b/flox/core.py
index 022f29582..7e2fb9141 100644
--- a/flox/core.py
+++ b/flox/core.py
@@ -1559,7 +1559,10 @@ def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroupsOpt) ->
         return (None,) * nby
 
     if nby == 1 and not isinstance(expected_groups, tuple):
-        return (np.asarray(expected_groups),)
+        if isinstance(expected_groups, pd.Index):
+            return (expected_groups,)
+        else:
+            return (np.asarray(expected_groups),)
 
     if nby > 1 and not isinstance(expected_groups, tuple):  # TODO: test for list
         raise ValueError(
@@ -1734,9 +1737,11 @@ def groupby_reduce(
     # (pd.IntervalIndex or not)
     expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort)
 
+    is_binning = any(isbins) or any([isinstance(e, pd.IntervalIndex) for e in expected_groups])
+
     # TODO: could restrict this to dask-only
     factorize_early = (nby > 1) or (
-        any(isbins) and method == "cohorts" and is_duck_dask_array(array)
+        is_binning and method == "cohorts" and is_duck_dask_array(array)
     )
     if factorize_early:
         bys, final_groups, grp_shape = _factorize_multiple(
diff --git a/flox/xarray.py b/flox/xarray.py
index 1bd384875..e02065dab 100644
--- a/flox/xarray.py
+++ b/flox/xarray.py
@@ -313,7 +313,9 @@ def xarray_reduce(
     group_names: tuple[Any, ...] = ()
     group_sizes: dict[Any, int] = {}
     for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups, isbins)):
-        group_name = b_.name if not isbin_ else f"{b_.name}_bins"
+        group_name = (
+            f"{b_.name}_bins" if isbin_ or isinstance(expect, pd.IntervalIndex) else b_.name
+        )
         group_names += (group_name,)
 
         if isbin_ and isinstance(expect, int):
diff --git a/tests/test_xarray.py b/tests/test_xarray.py
index 50864a247..e17c7b98e 100644
--- a/tests/test_xarray.py
+++ b/tests/test_xarray.py
@@ -457,7 +457,8 @@ def test_datetime_array_reduce(use_cftime, func, engine):
 
 
 @requires_dask
-def test_groupby_bins_indexed_coordinate():
+@pytest.mark.parametrize("method", ["cohorts", "map-reduce"])
+def test_groupby_bins_indexed_coordinate(method):
     ds = (
         xr.tutorial.open_dataset("air_temperature")
         .isel(time=slice(100))
@@ -472,7 +473,17 @@ def test_groupby_bins_indexed_coordinate():
         expected_groups=([40, 50, 60, 70],),
         isbin=(True,),
         func="mean",
-        method="split-reduce",
+        method=method,
+    )
+    xr.testing.assert_allclose(expected, actual)
+
+    actual = xarray_reduce(
+        ds,
+        ds.lat,
+        dim=ds.air.dims,
+        expected_groups=pd.IntervalIndex.from_breaks([40, 50, 60, 70]),
+        func="mean",
+        method=method,
     )
     xr.testing.assert_allclose(expected, actual)
 

From cd4e2eb36c703b944c81121252546abdd2b008e9 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Wed, 18 Jan 2023 13:35:38 -0700
Subject: [PATCH 2/3] Update flox/core.py

---
 flox/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flox/core.py b/flox/core.py
index 7e2fb9141..ca0cb922d 100644
--- a/flox/core.py
+++ b/flox/core.py
@@ -1737,7 +1737,7 @@ def groupby_reduce(
     # (pd.IntervalIndex or not)
     expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort)
 
-    is_binning = any(isbins) or any([isinstance(e, pd.IntervalIndex) for e in expected_groups])
+    is_binning = any([isinstance(e, pd.IntervalIndex) for e in expected_groups])
 
     # TODO: could restrict this to dask-only
     factorize_early = (nby > 1) or (

From 39267717f1f0388e0ab503b95bf3bba0b585a085 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Wed, 18 Jan 2023 20:48:43 -0700
Subject: [PATCH 3/3] More tests

---
 tests/test_core.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 0841e531a..1c208c211 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -644,10 +644,17 @@ def test_npg_nanarg_bug(func):
     assert_equal(actual, expected)
 
 
+@pytest.mark.parametrize(
+    "kwargs",
+    (
+        dict(expected_groups=np.array([1, 2, 4, 5]), isbin=True),
+        dict(expected_groups=pd.IntervalIndex.from_breaks([1, 2, 4, 5])),
+    ),
+)
 @pytest.mark.parametrize("method", ["cohorts", "map-reduce"])
 @pytest.mark.parametrize("chunk_labels", [False, True])
 @pytest.mark.parametrize("chunks", ((), (1,), (2,)))
-def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
+def test_groupby_bins(chunk_labels, kwargs, chunks, engine, method) -> None:
     array = [1, 1, 1, 1, 1, 1]
     labels = [0.2, 1.5, 1.9, 2, 3, 20]
 
@@ -663,14 +670,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
 
     with raise_if_dask_computes():
         actual, groups = groupby_reduce(
-            array,
-            labels,
-            func="count",
-            expected_groups=np.array([1, 2, 4, 5]),
-            isbin=True,
-            fill_value=0,
-            engine=engine,
-            method=method,
+            array, labels, func="count", fill_value=0, engine=engine, method=method, **kwargs
         )
     expected = np.array([3, 1, 0], dtype=np.intp)
     for left, right in zip(groups, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()):