Skip to content

Commit

Permalink
BUG: loc returning wrong elements for non-monotonic DatetimeIndex (pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored and luckyvs1 committed Jan 20, 2021
1 parent 3aac68e commit a7a6fd4
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 37 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`)
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
-
Expand Down
72 changes: 37 additions & 35 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,42 +776,44 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
if isinstance(end, date) and not isinstance(end, datetime):
end = datetime.combine(end, time(0, 0))

try:
def check_str_or_none(point):
return point is not None and not isinstance(point, str)

# GH#33146 if start and end are combinations of str and None and Index is not
# monotonic, we can not use Index.slice_indexer because it does not honor the
# actual elements, is only searching for start and end
if (
check_str_or_none(start)
or check_str_or_none(end)
or self.is_monotonic_increasing
):
return Index.slice_indexer(self, start, end, step, kind=kind)
except KeyError:
# For historical reasons DatetimeIndex by default supports
# value-based partial (aka string) slices on non-monotonic arrays,
# let's try that.
if (start is None or isinstance(start, str)) and (
end is None or isinstance(end, str)
):
mask = np.array(True)
deprecation_mask = np.array(True)
if start is not None:
start_casted = self._maybe_cast_slice_bound(start, "left", kind)
mask = start_casted <= self
deprecation_mask = start_casted == self

if end is not None:
end_casted = self._maybe_cast_slice_bound(end, "right", kind)
mask = (self <= end_casted) & mask
deprecation_mask = (end_casted == self) | deprecation_mask

if not deprecation_mask.any():
warnings.warn(
"Value based partial slicing on non-monotonic DatetimeIndexes "
"with non-existing keys is deprecated and will raise a "
"KeyError in a future Version.",
FutureWarning,
stacklevel=5,
)
indexer = mask.nonzero()[0][::step]
if len(indexer) == len(self):
return slice(None)
else:
return indexer
else:
raise

mask = np.array(True)
deprecation_mask = np.array(True)
if start is not None:
start_casted = self._maybe_cast_slice_bound(start, "left", kind)
mask = start_casted <= self
deprecation_mask = start_casted == self

if end is not None:
end_casted = self._maybe_cast_slice_bound(end, "right", kind)
mask = (self <= end_casted) & mask
deprecation_mask = (end_casted == self) | deprecation_mask

if not deprecation_mask.any():
warnings.warn(
"Value based partial slicing on non-monotonic DatetimeIndexes "
"with non-existing keys is deprecated and will raise a "
"KeyError in a future Version.",
FutureWarning,
stacklevel=5,
)
indexer = mask.nonzero()[0][::step]
if len(indexer) == len(self):
return slice(None)
else:
return indexer

# --------------------------------------------------------------------

Expand Down
41 changes: 39 additions & 2 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Expand Down Expand Up @@ -1556,6 +1557,42 @@ def test_loc_getitem_str_timedeltaindex(self):
sliced = df.loc["0 days"]
tm.assert_series_equal(sliced, expected)

@pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"])
def test_loc_getitem_partial_slice_non_monotonicity(
self, tz_aware_fixture, indexer_end, frame_or_series
):
# GH#33146
obj = frame_or_series(
[1] * 5,
index=DatetimeIndex(
[
Timestamp("2019-12-30"),
Timestamp("2020-01-01"),
Timestamp("2019-12-25"),
Timestamp("2020-01-02 23:59:59.999999999"),
Timestamp("2019-12-19"),
],
tz=tz_aware_fixture,
),
)
expected = frame_or_series(
[1] * 2,
index=DatetimeIndex(
[
Timestamp("2020-01-01"),
Timestamp("2020-01-02 23:59:59.999999999"),
],
tz=tz_aware_fixture,
),
)
indexer = slice("2020-01-01", indexer_end)

result = obj[indexer]
tm.assert_equal(result, expected)

result = obj.loc[indexer]
tm.assert_equal(result, expected)


class TestLabelSlicing:
def test_loc_getitem_label_slice_across_dst(self):
Expand Down Expand Up @@ -1652,7 +1689,7 @@ def test_loc_getitem_slice_columns_mixed_dtype(self):
# GH: 20975
df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0])
expected = DataFrame(
data=[[2, 3]], index=[0], columns=pd.Index([1, 2], dtype=object)
data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object)
)
tm.assert_frame_equal(df.loc[:, 1:], expected)

Expand Down Expand Up @@ -1858,7 +1895,7 @@ def test_loc_set_dataframe_multiindex():

def test_loc_mixed_int_float():
# GH#19456
ser = Series(range(2), pd.Index([1, 2.0], dtype=object))
ser = Series(range(2), Index([1, 2.0], dtype=object))

result = ser.loc[1]
assert result == 0
Expand Down

0 comments on commit a7a6fd4

Please sign in to comment.