Skip to content

Commit

Permalink
REGR: groupby.value_counts with all NA values
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Oct 7, 2024
1 parent 02267e5 commit b961ff5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,9 @@ def _ob_index_and_ids(
sorter = ob_index.argsort()
ob_index = ob_index.take(sorter)
_, index = np.unique(sorter, return_index=True)
ob_ids = np.where(ob_ids == -1, -1, index.take(ob_ids))
na_ids = ob_ids == -1
if not na_ids.all():
ob_ids = np.where(na_ids, -1, index.take(ob_ids))
ob_ids = ensure_platform_int(ob_ids)
return ob_index, ob_ids

Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/groupby/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,3 +1219,25 @@ def test_value_counts_sort_categorical(sort, vc_sort, normalize):
expected = expected.take(taker)

tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("groupby_sort", [True, False])
def test_value_counts_all_na(sort, dropna, groupby_sort):
# GH#59989
df = DataFrame({"a": [2, 1, 1], "b": np.nan})
gb = df.groupby("a", sort=groupby_sort)
result = gb.value_counts(sort=sort, dropna=dropna)

kwargs = {"levels": [[1, 2], [np.nan]], "names": ["a", "b"]}
if dropna:
data = []
index = MultiIndex(codes=[[], []], **kwargs)
elif not groupby_sort and not sort:
data = [1, 2]
index = MultiIndex(codes=[[1, 0], [0, 0]], **kwargs)
else:
data = [2, 1]
index = MultiIndex(codes=[[0, 1], [0, 0]], **kwargs)
expected = Series(data, index=index, dtype="int64", name="count")

tm.assert_series_equal(result, expected)

0 comments on commit b961ff5

Please sign in to comment.