From 8bc6e082d2d674134b3fd7d6c114448744202e99 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 24 Dec 2019 20:08:57 +0800 Subject: [PATCH 1/9] TST: added test on drop with non unique datetime index + invalid keys (GH30399) --- pandas/tests/indexes/multi/test_drop.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 2c24c5bd57085..476f695147f4b 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -139,3 +139,29 @@ def test_drop_not_lexsorted(): tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) with tm.assert_produces_warning(PerformanceWarning): tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_non_unique_datetime_index_and_invalid_keys(): + # GH 30399 + + # define dataframe with unique datetime index + df_unique = pd.DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'], + index=pd.date_range("2012", freq='H', periods=5)) + # create dataframe with non-unique datetime index + df_nonunique = df_unique.copy().iloc[[0, 2, 2, 3]] + + try: + df_nonunique.drop(['a', 'b']) # Dropping with labels not exist in the index + except Exception as e: + result = e + else: + result = "df_nonunique.drop(['a', 'b']) should raise error but it didn't" + + try: + df_unique.drop(['a', 'b']) # Dropping with labels not exist in the index + except Exception as e: + expected = e + else: + expected = "df_unique.drop(['a', 'b']) should raise error but it didn't" + + assert type(result) is type(expected) and result.args == expected.args From 2d747a0c4a7852a2b75efba14875c03971a89b21 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 24 Dec 2019 20:09:38 +0800 Subject: [PATCH 2/9] BUG: fixed bug in drop with non-unique datetime index and invalid keys (GH30399) --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7a238daeca9..07cf20da96952 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4551,7 +4551,7 @@ def get_indexer_non_unique(self, target): if is_categorical(target): tgt_values = np.asarray(target) - elif self.is_all_dates: + elif target.is_all_dates: # GH 30399 tgt_values = target.asi8 else: tgt_values = target._ndarray_values From 0b3600bc3171d406cf8077b4e2e17568416de1b1 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 24 Dec 2019 20:10:01 +0800 Subject: [PATCH 3/9] DOC: Add in what's new the fix in drop with non-unique datetime index (GH30399) --- doc/source/whatsnew/v1.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1253788d7ff27..e9a7057b07142 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -714,6 +714,8 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`pandas.core.indexes.base.Index.get_indexer_non_unique` incorrectly condition on self.is_all_date rather than target.is_all_date when converting target labels to asi8, which results in wrong error message when dropping with non-unique datetime index (:issue:`30399`) + Timedelta ^^^^^^^^^ From d9f45e0a9d011ba9d2c3c4573937e8ceb143012a Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Tue, 24 Dec 2019 21:41:24 +0800 Subject: [PATCH 4/9] TST: updated code formatting using black --- pandas/tests/indexes/multi/test_drop.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 476f695147f4b..20dbc4185812a 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -145,20 +145,23 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # GH 30399 # define dataframe with unique datetime index - df_unique = pd.DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'], - index=pd.date_range("2012", freq='H', periods=5)) + df_unique = pd.DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=pd.date_range("2012", freq="H", periods=5), + ) # create dataframe with non-unique datetime index df_nonunique = df_unique.copy().iloc[[0, 2, 2, 3]] try: - df_nonunique.drop(['a', 'b']) # Dropping with labels not exist in the index + df_nonunique.drop(["a", "b"]) # Dropping with labels not exist in the index except Exception as e: result = e else: result = "df_nonunique.drop(['a', 'b']) should raise error but it didn't" try: - df_unique.drop(['a', 'b']) # Dropping with labels not exist in the index + df_unique.drop(["a", "b"]) # Dropping with labels not exist in the index except Exception as e: expected = e else: From 2c6638d241d87e9355a3ecbe87978b40e46bbec8 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 24 Dec 2019 22:04:30 +0800 Subject: [PATCH 5/9] BUG: restored a check condition that is previously incorrectly dropped (GH30399) --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 07cf20da96952..272e97481a723 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4551,7 +4551,7 @@ def get_indexer_non_unique(self, target): if is_categorical(target): tgt_values = np.asarray(target) - elif target.is_all_dates: # GH 30399 + elif self.is_all_dates and target.is_all_dates: # GH 30399 tgt_values = target.asi8 else: tgt_values = target._ndarray_values From 963caa3684625422ce61471be8a7142c9971eeb0 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 24 Dec 2019 22:08:26 +0800 Subject: [PATCH 6/9] DOC: updated what's new for bug fix (GH30399) --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e9a7057b07142..e7a742cfd2713 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -714,7 +714,7 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) -- Bug in :meth:`pandas.core.indexes.base.Index.get_indexer_non_unique` incorrectly condition on self.is_all_date rather than target.is_all_date when converting target labels to asi8, which results in wrong error message when dropping with non-unique datetime index (:issue:`30399`) +- Bug in :meth:`pandas.core.indexes.base.Index.get_indexer_non_unique` missing condition on target.is_all_date before trying to convert target to asi8 values, which results in wrong error message when dropping with non-unique datetime index (:issue:`30399`) Timedelta From 90ea50cc92cbc8fa057a3114bfbebf4193f34e2d Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 00:41:02 +0800 Subject: [PATCH 7/9] TST: updated test to check specifically for KeyError (GH30399) --- pandas/tests/indexes/multi/test_drop.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 20dbc4185812a..76194a9853273 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -145,26 +145,13 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # GH 30399 # define dataframe with unique datetime index - df_unique = pd.DataFrame( + df = pd.DataFrame( np.random.randn(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), ) # create dataframe with non-unique datetime index - df_nonunique = df_unique.copy().iloc[[0, 2, 2, 3]] - - try: - df_nonunique.drop(["a", "b"]) # Dropping with labels not exist in the index - except Exception as e: - result = e - else: - result = "df_nonunique.drop(['a', 'b']) should raise error but it didn't" - - try: - df_unique.drop(["a", "b"]) # Dropping with labels not exist in the index - except Exception as e: - expected = e - else: - expected = "df_unique.drop(['a', 'b']) should raise error but it didn't" - - assert type(result) is type(expected) and result.args == expected.args + df = df.copy().iloc[[0, 2, 2, 3]] + + with pytest.raises(KeyError, match="not found in axis"): + df.drop(["a", "b"]) # Dropping with labels not exist in the index From ba254987aa8d7045f2cf7ad44ce5d38b9555a864 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 00:45:51 +0800 Subject: [PATCH 8/9] TST: refactored test to make it cleaner (GH30399) --- pandas/tests/indexes/multi/test_drop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 76194a9853273..ee60f4537ade3 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -151,7 +151,7 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): index=pd.date_range("2012", freq="H", periods=5), ) # create dataframe with non-unique datetime index - df = df.copy().iloc[[0, 2, 2, 3]] + df = df.iloc[[0, 2, 2, 3]].copy() with pytest.raises(KeyError, match="not found in axis"): df.drop(["a", "b"]) # Dropping with labels not exist in the index From a6675e42a9755614995979b0b0b6d941d507aecc Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Wed, 25 Dec 2019 08:02:29 +0800 Subject: [PATCH 9/9] DOC: Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: William Ayd --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e7a742cfd2713..388bedad45db6 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -714,7 +714,7 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) -- Bug in :meth:`pandas.core.indexes.base.Index.get_indexer_non_unique` missing condition on target.is_all_date before trying to convert target to asi8 values, which results in wrong error message when dropping with non-unique datetime index (:issue:`30399`) +- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) Timedelta