From 9587203416a23f8774d0349714b4b72a3c082343 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Tue, 19 Mar 2024 08:09:48 -0600
Subject: [PATCH 1/3] Fix nanlen with strings

Closes https://github.com/pydata/xarray/issues/8853
---
 flox/aggregate_npg.py     |  2 ++
 flox/aggregate_numbagg.py | 15 ++++++++++++++-
 tests/test_core.py        |  9 +++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/flox/aggregate_npg.py b/flox/aggregate_npg.py
index 6ffbc0b0..91d49cb7 100644
--- a/flox/aggregate_npg.py
+++ b/flox/aggregate_npg.py
@@ -88,6 +88,8 @@ def nanprod(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dt
 
 
 def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None):
+    if array.dtype.kind in "US":
+        array = np.broadcast_to(np.array([1]), array.shape)
     result = _get_aggregate(engine).aggregate(
         group_idx,
         array,
diff --git a/flox/aggregate_numbagg.py b/flox/aggregate_numbagg.py
index 1c0edbee..c2b718e8 100644
--- a/flox/aggregate_numbagg.py
+++ b/flox/aggregate_numbagg.py
@@ -105,11 +105,24 @@ def nanstd(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None,
     )
 
 
+def nanlen(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
+    if array.dtype.kind in "US":
+        array = np.broadcast_to(np.array([1]), array.shape)
+    return _numbagg_wrapper(
+        group_idx,
+        array,
+        axis=axis,
+        size=size,
+        func="nancount",
+        # fill_value=fill_value,
+        # dtype=dtype,
+    )
+
+
 nansum = partial(_numbagg_wrapper, func="nansum")
 nanmean = partial(_numbagg_wrapper, func="nanmean")
 nanprod = partial(_numbagg_wrapper, func="nanprod")
 nansum_of_squares = partial(_numbagg_wrapper, func="nansum_of_squares")
-nanlen = partial(_numbagg_wrapper, func="nancount")
 nanprod = partial(_numbagg_wrapper, func="nanprod")
 nanfirst = partial(_numbagg_wrapper, func="nanfirst")
 nanlast = partial(_numbagg_wrapper, func="nanlast")
diff --git a/tests/test_core.py b/tests/test_core.py
index 26c75a85..21d6667e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1688,3 +1688,12 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
     if by_ndim == 2:
         expected = expected.squeeze(axis=-2)
     assert_equal(expected, actual, tolerance=1e-14)
+
+
+@pytest.mark.parametrize("dtype", ["U3", "S3"])
+def test_nanlen_string(dtype, engine):
+    array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
+    by = np.array([0, 0, 1, 2, 1, 0])
+    expected = np.array([3, 2, 1])
+    actual, *_ = groupby_reduce(array, by, func="count", engine=engine)
+    assert_equal(expected, actual)

From be56b88579a41d8c52d7713dd6690d24ee148e29 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Tue, 19 Mar 2024 08:53:55 -0600
Subject: [PATCH 2/3] fix windows

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 21d6667e..2d5dfc41 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1694,6 +1694,6 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
 def test_nanlen_string(dtype, engine):
     array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
     by = np.array([0, 0, 1, 2, 1, 0])
-    expected = np.array([3, 2, 1])
+    expected = np.array([3, 2, 1], dtype=np.intp)
     actual, *_ = groupby_reduce(array, by, func="count", engine=engine)
     assert_equal(expected, actual)

From 2764c94015f277fe8e393303144e74c513ecb1f6 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Tue, 19 Mar 2024 08:55:51 -0600
Subject: [PATCH 3/3] Silence warnings

---
 flox/aggregate_flox.py | 6 ++++--
 tests/test_core.py     | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index ef64a371..4bd9c24a 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -37,7 +37,8 @@ def _lerp(a, b, *, t, dtype, out=None):
     """
     if out is None:
         out = np.empty_like(a, dtype=dtype)
-    diff_b_a = np.subtract(b, a)
+    with np.errstate(invalid="ignore"):
+        diff_b_a = np.subtract(b, a)
     # asanyarray is a stop-gap until gh-13105
     np.add(a, diff_b_a * t, out=out)
     np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5)
@@ -95,7 +96,8 @@ def quantile_(array, inv_idx, *, q, axis, skipna, group_idx, dtype=None, out=Non
 
     # partition the complex array in-place
     labels_broadcast = np.broadcast_to(group_idx, array.shape)
-    cmplx = labels_broadcast + 1j * array
+    with np.errstate(invalid="ignore"):
+        cmplx = labels_broadcast + 1j * array
     cmplx.partition(kth=kth, axis=-1)
     if is_scalar_q:
         a_ = cmplx.imag
diff --git a/tests/test_core.py b/tests/test_core.py
index 2d5dfc41..19c96758 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1127,7 +1127,7 @@ def test_group_by_datetime(engine, method):
 
     edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy()
     actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs)
-    expected = data.resample("M").mean().to_numpy()
+    expected = data.resample("ME").mean().to_numpy()
     assert_equal(expected, actual)
 
     actual, _ = groupby_reduce(