From 9587203416a23f8774d0349714b4b72a3c082343 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 08:09:48 -0600 Subject: [PATCH 1/3] Fix nanlen with strings Closes https://github.com/pydata/xarray/issues/8853 --- flox/aggregate_npg.py | 2 ++ flox/aggregate_numbagg.py | 15 ++++++++++++++- tests/test_core.py | 9 +++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/flox/aggregate_npg.py b/flox/aggregate_npg.py index 6ffbc0b0..91d49cb7 100644 --- a/flox/aggregate_npg.py +++ b/flox/aggregate_npg.py @@ -88,6 +88,8 @@ def nanprod(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dt def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None): + if array.dtype.kind in "US": + array = np.broadcast_to(np.array([1]), array.shape) result = _get_aggregate(engine).aggregate( group_idx, array, diff --git a/flox/aggregate_numbagg.py b/flox/aggregate_numbagg.py index 1c0edbee..c2b718e8 100644 --- a/flox/aggregate_numbagg.py +++ b/flox/aggregate_numbagg.py @@ -105,11 +105,24 @@ def nanstd(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None, ) +def nanlen(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): + if array.dtype.kind in "US": + array = np.broadcast_to(np.array([1]), array.shape) + return _numbagg_wrapper( + group_idx, + array, + axis=axis, + size=size, + func="nancount", + # fill_value=fill_value, + # dtype=dtype, + ) + + nansum = partial(_numbagg_wrapper, func="nansum") nanmean = partial(_numbagg_wrapper, func="nanmean") nanprod = partial(_numbagg_wrapper, func="nanprod") nansum_of_squares = partial(_numbagg_wrapper, func="nansum_of_squares") -nanlen = partial(_numbagg_wrapper, func="nancount") nanprod = partial(_numbagg_wrapper, func="nanprod") nanfirst = partial(_numbagg_wrapper, func="nanfirst") nanlast = partial(_numbagg_wrapper, func="nanlast") diff --git a/tests/test_core.py b/tests/test_core.py index 26c75a85..21d6667e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1688,3 +1688,12 @@ def test_multiple_quantiles(q, chunk, func, by_ndim): if by_ndim == 2: expected = expected.squeeze(axis=-2) assert_equal(expected, actual, tolerance=1e-14) + + +@pytest.mark.parametrize("dtype", ["U3", "S3"]) +def test_nanlen_string(dtype, engine): + array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype) + by = np.array([0, 0, 1, 2, 1, 0]) + expected = np.array([3, 2, 1]) + actual, *_ = groupby_reduce(array, by, func="count", engine=engine) + assert_equal(expected, actual) From be56b88579a41d8c52d7713dd6690d24ee148e29 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 08:53:55 -0600 Subject: [PATCH 2/3] fix windows --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 21d6667e..2d5dfc41 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1694,6 +1694,6 @@ def test_multiple_quantiles(q, chunk, func, by_ndim): def test_nanlen_string(dtype, engine): array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype) by = np.array([0, 0, 1, 2, 1, 0]) - expected = np.array([3, 2, 1]) + expected = np.array([3, 2, 1], dtype=np.intp) actual, *_ = groupby_reduce(array, by, func="count", engine=engine) assert_equal(expected, actual) From 2764c94015f277fe8e393303144e74c513ecb1f6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 08:55:51 -0600 Subject: [PATCH 3/3] Silence warnings --- flox/aggregate_flox.py | 6 ++++-- tests/test_core.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index ef64a371..4bd9c24a 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -37,7 +37,8 @@ def _lerp(a, b, *, t, dtype, out=None): """ if out is None: out = np.empty_like(a, dtype=dtype) - diff_b_a = np.subtract(b, a) + with np.errstate(invalid="ignore"): + diff_b_a = np.subtract(b, a) # asanyarray is a stop-gap until gh-13105 np.add(a, diff_b_a * t, out=out) np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5) @@ -95,7 +96,8 @@ def quantile_(array, inv_idx, *, q, axis, skipna, group_idx, dtype=None, out=Non # partition the complex array in-place labels_broadcast = np.broadcast_to(group_idx, array.shape) - cmplx = labels_broadcast + 1j * array + with np.errstate(invalid="ignore"): + cmplx = labels_broadcast + 1j * array cmplx.partition(kth=kth, axis=-1) if is_scalar_q: a_ = cmplx.imag diff --git a/tests/test_core.py b/tests/test_core.py index 2d5dfc41..19c96758 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1127,7 +1127,7 @@ def test_group_by_datetime(engine, method): edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy() actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) - expected = data.resample("M").mean().to_numpy() + expected = data.resample("ME").mean().to_numpy() assert_equal(expected, actual) actual, _ = groupby_reduce(