Skip to content

Commit

Permalink
BUG: do not raise UnsortedIndexError if sorting is not required
Browse files Browse the repository at this point in the history
closes pandas-dev#16734

Author: Pietro Battiston <me@pietrobattiston.it>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff.reback@twosigma.com>

Closes pandas-dev#16736 from toobaz/index_what_you_can and squashes the following commits:

f77e2b3 [Pietro Battiston] BUG: do not raise UnsortedIndexError if sorting is not required
  • Loading branch information
toobaz authored and jreback committed Jun 21, 2017
1 parent 25e0576 commit 8a98f5e
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ Indexing
^^^^^^^^

- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).


I/O
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,13 @@ def is_null_slice(obj):
obj.stop is None and obj.step is None)


def is_true_slices(l):
"""
Find non-trivial slices in "l": return a list of booleans with same length.
"""
return [isinstance(k, slice) and not is_null_slice(k) for k in l]


def is_full_slice(obj, l):
""" we have a full length slice """
return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
Expand Down
21 changes: 8 additions & 13 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from pandas.errors import PerformanceWarning, UnsortedIndexError
from pandas.core.common import (_values_from_object,
is_bool_indexer,
is_null_slice)
is_null_slice,
is_true_slices)

import pandas.core.base as base
from pandas.util._decorators import (Appender, cache_readonly,
Expand Down Expand Up @@ -1035,12 +1036,6 @@ def is_lexsorted(self):
"""
return self.lexsort_depth == self.nlevels

def is_lexsorted_for_tuple(self, tup):
"""
Return True if we are correctly lexsorted given the passed tuple
"""
return len(tup) <= self.lexsort_depth

@cache_readonly
def lexsort_depth(self):
if self.sortorder is not None:
Expand Down Expand Up @@ -2262,12 +2257,12 @@ def get_locs(self, tup):
"""

# must be lexsorted to at least as many levels
if not self.is_lexsorted_for_tuple(tup):
raise UnsortedIndexError('MultiIndex Slicing requires the index '
'to be fully lexsorted tuple len ({0}), '
'lexsort depth ({1})'
.format(len(tup), self.lexsort_depth))

true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s]
if true_slices and true_slices[-1] >= self.lexsort_depth:
raise UnsortedIndexError('MultiIndex slicing requires the index '
'to be lexsorted: slicing on levels {0}, '
'lexsort depth {1}'
.format(true_slices, self.lexsort_depth))
# indexer
# this is the list of all values that we want to select
n = len(self)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2826,8 +2826,13 @@ def test_unsortedindex(self):
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
columns=['one', 'two'])

# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)['z', 'a']
expected = df.iloc[0]
tm.assert_series_equal(result, expected)

with pytest.raises(UnsortedIndexError):
df.loc(axis=0)['z', :]
df.loc(axis=0)['z', slice('a')]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)['z', :]) == 2

Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/indexing/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,13 @@ def f():
assert df.index.lexsort_depth == 0
with tm.assert_raises_regex(
UnsortedIndexError,
'MultiIndex Slicing requires the index to be fully '
r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
'MultiIndex slicing requires the index to be '
r'lexsorted: slicing on levels \[1\], lexsort depth 0'):
df.loc[(slice(None), slice('bar')), :]

# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])

def test_multiindex_slicers_non_unique(self):

Expand Down Expand Up @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self):

# not sorted
def f():
df.loc['A1', (slice(None), 'foo')]
df.loc['A1', ('a', slice('foo'))]

pytest.raises(UnsortedIndexError, f)

# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
df.loc['A1'].iloc[:, [0, 2]])

df = df.sort_index(axis=1)

# slicing
Expand Down

0 comments on commit 8a98f5e

Please sign in to comment.