From a7a6fd40016bd3fc519337e77d39b26a8cdbf09b Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Tue, 5 Jan 2021 03:27:20 +0100 Subject: [PATCH] BUG: loc returning wrong elements for non-monotonic DatetimeIndex (#38010) --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexes/datetimes.py | 72 ++++++++++++++++--------------- pandas/tests/indexing/test_loc.py | 41 +++++++++++++++++- 3 files changed, 77 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 151c54b664a3a0..ac3b5dcaf53ae8 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -243,6 +243,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) +- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f82ee27aef5342..932868451058f1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -776,42 +776,44 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): if isinstance(end, date) and not isinstance(end, datetime): end = datetime.combine(end, time(0, 0)) - try: + def check_str_or_none(point): + return point is not None and not isinstance(point, str) + + # GH#33146 if start and end are combinations of str and None and Index is not + # monotonic, we can not use Index.slice_indexer because it does not honor the + # actual elements, is only searching for start and end + if ( + check_str_or_none(start) + or check_str_or_none(end) + or self.is_monotonic_increasing + ): return Index.slice_indexer(self, start, end, step, kind=kind) - except KeyError: - # For historical reasons DatetimeIndex by default supports - # value-based partial (aka string) slices on non-monotonic arrays, - # let's try that. - if (start is None or isinstance(start, str)) and ( - end is None or isinstance(end, str) - ): - mask = np.array(True) - deprecation_mask = np.array(True) - if start is not None: - start_casted = self._maybe_cast_slice_bound(start, "left", kind) - mask = start_casted <= self - deprecation_mask = start_casted == self - - if end is not None: - end_casted = self._maybe_cast_slice_bound(end, "right", kind) - mask = (self <= end_casted) & mask - deprecation_mask = (end_casted == self) | deprecation_mask - - if not deprecation_mask.any(): - warnings.warn( - "Value based partial slicing on non-monotonic DatetimeIndexes " - "with non-existing keys is deprecated and will raise a " - "KeyError in a future Version.", - FutureWarning, - stacklevel=5, - ) - indexer = mask.nonzero()[0][::step] - if len(indexer) == len(self): - return slice(None) - else: - return indexer - else: - raise + + mask = np.array(True) + deprecation_mask = np.array(True) + if start is not None: + start_casted = self._maybe_cast_slice_bound(start, "left", kind) + mask = start_casted <= self + deprecation_mask = start_casted == self + + if end is not None: + end_casted = self._maybe_cast_slice_bound(end, "right", kind) + mask = (self <= end_casted) & mask + deprecation_mask = (end_casted == self) | deprecation_mask + + if not deprecation_mask.any(): + warnings.warn( + "Value based partial slicing on non-monotonic DatetimeIndexes " + "with non-existing keys is deprecated and will raise a " + "KeyError in a future Version.", + FutureWarning, + stacklevel=5, + ) + indexer = mask.nonzero()[0][::step] + if len(indexer) == len(self): + return slice(None) + else: + return indexer # -------------------------------------------------------------------- diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6c5cd0f335faa6..7c73917e44b220 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -15,6 +15,7 @@ Categorical, CategoricalIndex, DataFrame, + DatetimeIndex, Index, MultiIndex, Series, @@ -1556,6 +1557,42 @@ def test_loc_getitem_str_timedeltaindex(self): sliced = df.loc["0 days"] tm.assert_series_equal(sliced, expected) + @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) + def test_loc_getitem_partial_slice_non_monotonicity( + self, tz_aware_fixture, indexer_end, frame_or_series + ): + # GH#33146 + obj = frame_or_series( + [1] * 5, + index=DatetimeIndex( + [ + Timestamp("2019-12-30"), + Timestamp("2020-01-01"), + Timestamp("2019-12-25"), + Timestamp("2020-01-02 23:59:59.999999999"), + Timestamp("2019-12-19"), + ], + tz=tz_aware_fixture, + ), + ) + expected = frame_or_series( + [1] * 2, + index=DatetimeIndex( + [ + Timestamp("2020-01-01"), + Timestamp("2020-01-02 23:59:59.999999999"), + ], + tz=tz_aware_fixture, + ), + ) + indexer = slice("2020-01-01", indexer_end) + + result = obj[indexer] + tm.assert_equal(result, expected) + + result = obj.loc[indexer] + tm.assert_equal(result, expected) + class TestLabelSlicing: def test_loc_getitem_label_slice_across_dst(self): @@ -1652,7 +1689,7 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): # GH: 20975 df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0]) expected = DataFrame( - data=[[2, 3]], index=[0], columns=pd.Index([1, 2], dtype=object) + data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object) ) tm.assert_frame_equal(df.loc[:, 1:], expected) @@ -1858,7 +1895,7 @@ def test_loc_set_dataframe_multiindex(): def test_loc_mixed_int_float(): # GH#19456 - ser = Series(range(2), pd.Index([1, 2.0], dtype=object)) + ser = Series(range(2), Index([1, 2.0], dtype=object)) result = ser.loc[1] assert result == 0