From b384c51f0ccab7f2a90c32e5d574c72548353caa Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 3 Oct 2020 17:14:05 +0200 Subject: [PATCH 01/14] Fix dropped nas with one group column an dropna=False --- pandas/_libs/lib.pyx | 5 ++++- pandas/core/groupby/ops.py | 9 +++------ pandas/core/sorting.py | 3 ++- pandas/tests/groupby/test_groupby.py | 25 +++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 61a9634b00211..176c4832cfa03 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -902,7 +902,8 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys, val = keys[j][sorted_labels[j][i - 1]] PyTuple_SET_ITEM(tup, j, val) Py_INCREF(val) - + if len(tup) == 1: + tup = tup[0] result[tup] = index[start:i] start = i cur = lab @@ -912,6 +913,8 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys, val = keys[j][sorted_labels[j][n - 1]] PyTuple_SET_ITEM(tup, j, val) Py_INCREF(val) + if len(tup) == 1: + tup = tup[0] result[tup] = index[start:] return result diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6051aa3022da1..e544f0892c201 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -217,12 +217,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): @cache_readonly def indices(self): """ dict {group name -> group indices} """ - if len(self.groupings) == 1: - return self.groupings[0].indices - else: - codes_list = [ping.codes for ping in self.groupings] - keys = [ping.group_index for ping in self.groupings] - return get_indexer_dict(codes_list, keys) + codes_list = [ping.codes for ping in self.groupings] + keys = [ping.group_index for ping in self.groupings] + return get_indexer_dict(codes_list, keys) @property def codes(self) -> List[np.ndarray]: diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e02b565ed5d7b..5808d347b920a 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -537,7 +537,8 @@ def get_indexer_dict(label_list, keys): sorted_labels = [lab.take(sorter) for lab in label_list] group_index = group_index.take(sorter) - + if np.all(group_index == -1): + return {} return lib.indices_fast(sorter, group_index, keys, sorted_labels) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6783fc5b66433..d938b54c2004d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1297,6 +1297,31 @@ def test_groupby_nat_exclude(): grouped.get_group(pd.NaT) +def test_groupby_two_group_keys_all_nan(): + # Grouping over two group keys all nan raised an error previously + df = pd.DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) + result = df.groupby(["a", "b"]).indices + assert result == {} + + +def test_groupby_nan_included(): + # GH 35646, GH 35542 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + grouped = df.groupby("group", dropna=False) + result = grouped.indices + expected = {"g1": np.array([0, 2]), "g2": np.array([3]), np.nan: np.array([1, 4])} + for result_values, expected_values in zip(result.values(), expected.values()): + tm.assert_numpy_array_equal(result_values, expected_values) + assert np.isnan(list(result.keys())[2]) + + result = grouped.mean() + expected = pd.DataFrame( + {"B": [1.0, 3.0, 2.5]}, index=pd.Index(["g1", "g2", np.nan], name="group") + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_2d_malformed(): d = DataFrame(index=range(2)) d["group"] = ["g1", "g2"] From 08c308886c884869f6bca16779e4c4f66e2690da Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 4 Oct 2020 01:31:35 +0200 Subject: [PATCH 02/14] Add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/sorting.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cb0858fd678f8..03a52c8cccc21 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -410,6 +410,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) +- Bug in :meth:`Groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5808d347b920a..9c71d989d8518 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -527,6 +527,8 @@ def get_indexer_dict(label_list, keys): shape = [len(x) for x in keys] group_index = get_group_index(label_list, shape, sort=True, xnull=True) + if np.all(group_index == -1): + return {} ngroups = ( ((group_index.size and group_index.max()) + 1) if is_int64_overflow_possible(shape) @@ -537,8 +539,7 @@ def get_indexer_dict(label_list, keys): sorted_labels = [lab.take(sorter) for lab in label_list] group_index = group_index.take(sorter) - if np.all(group_index == -1): - return {} + return lib.indices_fast(sorter, group_index, keys, sorted_labels) From 47649aa986ddbb144c0d54a4ef4389119f0d8f33 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 4 Oct 2020 15:05:43 +0200 Subject: [PATCH 03/14] Fix failing test --- pandas/tests/groupby/test_groupby.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d938b54c2004d..25942a282fce4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1310,7 +1310,12 @@ def test_groupby_nan_included(): df = pd.DataFrame(data) grouped = df.groupby("group", dropna=False) result = grouped.indices - expected = {"g1": np.array([0, 2]), "g2": np.array([3]), np.nan: np.array([1, 4])} + dtype = "int64" + expected = { + "g1": np.array([0, 2], dtype=dtype), + "g2": np.array([3], dtype=dtype), + np.nan: np.array([1, 4], dtype=dtype), + } for result_values, expected_values in zip(result.values(), expected.values()): tm.assert_numpy_array_equal(result_values, expected_values) assert np.isnan(list(result.keys())[2]) From c93590b1bd6b8869433a2bd19dbdb48840339983 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 6 Oct 2020 20:29:07 +0200 Subject: [PATCH 04/14] Adress review --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/groupby/test_groupby.py | 25 +------------------- pandas/tests/groupby/test_groupby_dropna.py | 26 ++++++++++++++++++++- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 03a52c8cccc21..5dd7d7a3b3ae7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -410,7 +410,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) -- Bug in :meth:`Groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) +- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 25942a282fce4..b0b74eba6a3df 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1298,35 +1298,12 @@ def test_groupby_nat_exclude(): def test_groupby_two_group_keys_all_nan(): - # Grouping over two group keys all nan raised an error previously + # GH #36842: Grouping over two group keys shouldn't raise an error df = pd.DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) result = df.groupby(["a", "b"]).indices assert result == {} -def test_groupby_nan_included(): - # GH 35646, GH 35542 - data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} - df = pd.DataFrame(data) - grouped = df.groupby("group", dropna=False) - result = grouped.indices - dtype = "int64" - expected = { - "g1": np.array([0, 2], dtype=dtype), - "g2": np.array([3], dtype=dtype), - np.nan: np.array([1, 4], dtype=dtype), - } - for result_values, expected_values in zip(result.values(), expected.values()): - tm.assert_numpy_array_equal(result_values, expected_values) - assert np.isnan(list(result.keys())[2]) - - result = grouped.mean() - expected = pd.DataFrame( - {"B": [1.0, 3.0, 2.5]}, index=pd.Index(["g1", "g2", np.nan], name="group") - ) - tm.assert_frame_equal(result, expected) - - def test_groupby_2d_malformed(): d = DataFrame(index=range(2)) d["group"] = ["g1", "g2"] diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index cd6c17955c18d..1907365cda84d 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -import pandas.testing as tm +import pandas._testing as tm @pytest.mark.parametrize( @@ -336,3 +336,27 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, expected = pd.DataFrame(selected_data, index=mi) tm.assert_frame_equal(result, expected) + + +def test_groupby_nan_included(): + # GH 35646, GH 35542 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + grouped = df.groupby("group", dropna=False) + result = grouped.indices + dtype = "int64" + expected = { + "g1": np.array([0, 2], dtype=dtype), + "g2": np.array([3], dtype=dtype), + np.nan: np.array([1, 4], dtype=dtype), + } + for result_values, expected_values in zip(result.values(), expected.values()): + tm.assert_numpy_array_equal(result_values, expected_values) + assert np.isnan(list(result.keys())[2]) + assert list(result.keys())[0:2] == ["g1", "g2"] + + result = grouped.mean() + expected = pd.DataFrame( + {"B": [1.0, 3.0, 2.5]}, index=pd.Index(["g1", "g2", np.nan], name="group") + ) + tm.assert_frame_equal(result, expected) From c61ce7a4bf868387863a6dae5d7fa69346d4de90 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 7 Oct 2020 01:32:36 +0200 Subject: [PATCH 05/14] Address review comments --- pandas/_libs/lib.pyx | 32 ++++++++++++++++++-------------- pandas/core/sorting.py | 11 ++++++++--- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 176c4832cfa03..c412adc3cd9c2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -897,24 +897,28 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys, if lab != cur: if lab != -1: - tup = PyTuple_New(k) - for j in range(k): - val = keys[j][sorted_labels[j][i - 1]] - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) - if len(tup) == 1: - tup = tup[0] + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][i - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][i - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) result[tup] = index[start:i] start = i cur = lab - tup = PyTuple_New(k) - for j in range(k): - val = keys[j][sorted_labels[j][n - 1]] - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) - if len(tup) == 1: - tup = tup[0] + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][n - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][n - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) result[tup] = index[start:] return result diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 9c71d989d8518..e87449c77e818 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -4,6 +4,7 @@ TYPE_CHECKING, Callable, DefaultDict, + Dict, Iterable, List, Optional, @@ -22,7 +23,7 @@ ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms @@ -517,17 +518,21 @@ def get_flattened_list( return [tuple(array) for array in arrays.values()] -def get_indexer_dict(label_list, keys): +def get_indexer_dict( + label_list: List[ABCIndex], keys: List[np.ndarray] +) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns ------- - dict + dict: Labels mapped to indexers. """ shape = [len(x) for x in keys] group_index = get_group_index(label_list, shape, sort=True, xnull=True) if np.all(group_index == -1): + # When all keys are nan and dropna=True, indices_fast can't handle this + # and the return is empty anyway return {} ngroups = ( ((group_index.size and group_index.max()) + 1) From dfa522afd9e2e4dcf24da9a04bb2197b99005aab Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 7 Oct 2020 10:08:11 +0200 Subject: [PATCH 06/14] Adress review comments --- pandas/core/sorting.py | 6 +++--- pandas/tests/groupby/test_groupby_dropna.py | 8 +------- pandas/tests/window/test_rolling.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e87449c77e818..e241e53d154ce 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -16,14 +16,14 @@ from pandas._libs import algos, hashtable, lib from pandas._libs.hashtable import unique_label_indices -from pandas._typing import IndexKeyFunc +from pandas._typing import IndexKeyFunc, Label from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex +from pandas.core.dtypes.generic import ABCMultiIndex from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms @@ -519,7 +519,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[ABCIndex], keys: List[np.ndarray] + label_list: List[np.ndarray], keys: Label ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 1907365cda84d..cf13e0518038b 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -339,7 +339,7 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, def test_groupby_nan_included(): - # GH 35646, GH 35542 + # GH 35646 data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} df = pd.DataFrame(data) grouped = df.groupby("group", dropna=False) @@ -354,9 +354,3 @@ def test_groupby_nan_included(): tm.assert_numpy_array_equal(result_values, expected_values) assert np.isnan(list(result.keys())[2]) assert list(result.keys())[0:2] == ["g1", "g2"] - - result = grouped.mean() - expected = pd.DataFrame( - {"B": [1.0, 3.0, 2.5]}, index=pd.Index(["g1", "g2", np.nan], name="group") - ) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index eaee276c7a388..3cafc9c6650ce 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -868,3 +868,18 @@ def test_rolling_period_index(index, window, func, values): result = getattr(ds.rolling(window, closed="left"), func)() expected = pd.Series(values, index=index) tm.assert_series_equal(result, expected) + + +def test_groupby_rolling_nan_included(): + # GH 35542 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean() + expected = pd.DataFrame( + {"B": [0.0, 2.0, 3.0, 1.0, 4.0]}, + index=pd.MultiIndex.from_tuples( + [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)], + names=["group", None], + ), + ) + tm.assert_frame_equal(result, expected) From 2e3e1bf1cb8fdbdf84cd078157d75ef363669694 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 8 Oct 2020 00:18:04 +0200 Subject: [PATCH 07/14] Change type hint --- pandas/core/sorting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e241e53d154ce..2f453c8b775f6 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -5,6 +5,7 @@ Callable, DefaultDict, Dict, + Hashable, Iterable, List, Optional, @@ -16,7 +17,7 @@ from pandas._libs import algos, hashtable, lib from pandas._libs.hashtable import unique_label_indices -from pandas._typing import IndexKeyFunc, Label +from pandas._typing import IndexKeyFunc from pandas.core.dtypes.common import ( ensure_int64, @@ -519,7 +520,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: Label + label_list: List[np.ndarray], keys: Hashable ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns From d0672800a463b87afa016417893c194a0d4c5fba Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 8 Oct 2020 01:14:44 +0200 Subject: [PATCH 08/14] Change type annotation --- pandas/core/sorting.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2f453c8b775f6..395a28f5c3ab8 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -5,7 +5,6 @@ Callable, DefaultDict, Dict, - Hashable, Iterable, List, Optional, @@ -17,7 +16,7 @@ from pandas._libs import algos, hashtable, lib from pandas._libs.hashtable import unique_label_indices -from pandas._typing import IndexKeyFunc +from pandas._typing import IndexKeyFunc, Label from pandas.core.dtypes.common import ( ensure_int64, @@ -520,7 +519,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: Hashable + label_list: List[np.ndarray], keys: List[Label] ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns From 5b5b6739938b365d6e9784a68fe7db5d79ab26be Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 9 Oct 2020 22:01:28 +0200 Subject: [PATCH 09/14] Fix type hint to index --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 395a28f5c3ab8..843b92fe2a14c 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -519,7 +519,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: List[Label] + label_list: List[np.ndarray], keys: List[Index] ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns From b0a0372412fc68c8a7b3e3bc9a0e774b16c6ee53 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 9 Oct 2020 22:09:18 +0200 Subject: [PATCH 10/14] Fix type hint because Index can not be imported --- pandas/core/sorting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 843b92fe2a14c..9e1e9482fd061 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -16,14 +16,14 @@ from pandas._libs import algos, hashtable, lib from pandas._libs.hashtable import unique_label_indices -from pandas._typing import IndexKeyFunc, Label +from pandas._typing import IndexKeyFunc from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.generic import ABCMultiIndex, ABCIndex from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms @@ -519,7 +519,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: List[Index] + label_list: List[np.ndarray], keys: List[ABCIndex] ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns From 31005114be7fd3c51aac5de1cfb73bf03664885e Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 9 Oct 2020 22:16:54 +0200 Subject: [PATCH 11/14] Change import order --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 9e1e9482fd061..f95d964e07374 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -23,7 +23,7 @@ ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCMultiIndex, ABCIndex +from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms From 6ca432489be1456ace131cd7ea1038ca7984ef72 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 10 Oct 2020 19:50:04 +0200 Subject: [PATCH 12/14] Fix type hints --- pandas/core/sorting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index f95d964e07374..ea80628386b5a 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -23,7 +23,7 @@ ensure_platform_int, is_extension_array_dtype, ) -from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex +from pandas.core.dtypes.generic import ABCMultiIndex from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms @@ -519,7 +519,7 @@ def get_flattened_list( def get_indexer_dict( - label_list: List[np.ndarray], keys: List[ABCIndex] + label_list: List[np.ndarray], keys: List["Index"] ) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns From 528fe0d231770fce13536edd0a04310c7688d98d Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 30 Oct 2020 01:07:51 +0100 Subject: [PATCH 13/14] Fix pattern --- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/window/test_rolling.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 395feb0563083..f426323846787 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1300,7 +1300,7 @@ def test_groupby_nat_exclude(): def test_groupby_two_group_keys_all_nan(): # GH #36842: Grouping over two group keys shouldn't raise an error - df = pd.DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) + df = DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) result = df.groupby(["a", "b"]).indices assert result == {} diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index f744622449a63..02bcfab8d3388 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1092,9 +1092,9 @@ def test_rolling_corr_timedelta_index(index, window): def test_groupby_rolling_nan_included(): # GH 35542 data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} - df = pd.DataFrame(data) + df = DataFrame(data) result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean() - expected = pd.DataFrame( + expected = DataFrame( {"B": [0.0, 2.0, 3.0, 1.0, 4.0]}, index=pd.MultiIndex.from_tuples( [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)], From f5b25cb60813c67d81efc634c6a35a795d1244e6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 31 Oct 2020 22:31:53 +0100 Subject: [PATCH 14/14] Add lost whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0155181c46bfe..1cb8710799d30 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -517,7 +517,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.rolling` returned wrong values with timeaware window containing ``NaN``. Raises ``ValueError`` because windows are not monotonic now (:issue:`34617`) - Bug in :meth:`Rolling.__iter__` where a ``ValueError`` was not raised when ``min_periods`` was larger than ``window`` (:issue:`37156`) - Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`) -- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) +- Bug in :meth:`df.groupby(..).quantile() ` and :meth:`df.resample(..).quantile() ` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`) - Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`) - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)