Skip to content

Commit

Permalink
BUG: indexing changes to .loc for compat to .ix for several situations
Browse files Browse the repository at this point in the history
handle iterator
handle NamedTuple
.loc retuns scalar selection dtypes correctly, closes pandas-dev#11617

xref pandas-dev#15113
  • Loading branch information
jreback committed Jan 12, 2017
1 parent 0fe491d commit 801c8d9
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 9 deletions.
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ Other API Changes
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`)

- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`)
- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
- ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)
Expand Down Expand Up @@ -318,7 +318,7 @@ Bug Fixes
- Bug in ``Series`` construction with a datetimetz (:issue:`14928`)

- Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`)

- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`)



Expand Down
80 changes: 73 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
is_categorical_dtype,
is_list_like,
is_sequence,
is_iterator,
is_scalar,
is_sparse,
_is_unorderable_exception,
Expand Down Expand Up @@ -1300,17 +1301,24 @@ class _LocationIndexer(_NDFrameIndexer):
_exception = Exception

def __getitem__(self, key):
if isinstance(key, tuple):
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
else:
# scalar callable may return tuple
key = com._apply_if_callable(key, self.obj)

if type(key) is tuple:
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
try:
if self._is_scalar_access(key):
return self._getitem_scalar(key)
except (KeyError, IndexError):
pass
return self._getitem_tuple(key)
else:
key = com._apply_if_callable(key, self.obj)
return self._getitem_axis(key, axis=0)

def _is_scalar_access(self, key):
raise NotImplementedError()

def _getitem_scalar(self, key):
raise NotImplementedError()

def _getitem_axis(self, key, axis=0):
raise NotImplementedError()

Expand Down Expand Up @@ -1389,7 +1397,8 @@ def _has_valid_type(self, key, axis):
return True

# TODO: don't check the entire key unless necessary
if len(key) and np.all(ax.get_indexer_for(key) < 0):
if (not is_iterator(key) and len(key) and
np.all(ax.get_indexer_for(key) < 0)):

raise KeyError("None of [%s] are in the [%s]" %
(key, self.obj._get_axis_name(axis)))
Expand Down Expand Up @@ -1420,6 +1429,36 @@ def error():

return True

def _is_scalar_access(self, key):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, '__len__'):
return False

if len(key) != self.ndim:
return False

for i, k in enumerate(key):
if not is_scalar(k):
return False

ax = self.obj.axes[i]
if isinstance(ax, MultiIndex):
return False

if not ax.is_unique:
return False

return True

def _getitem_scalar(self, key):
# a fast-path to scalar access
# if not, raise
values = self.obj.get_value(*key)
return values

def _get_partial_string_timestamp_match_key(self, key, labels):
"""Translate any partial string timestamp matches in key, returning the
new key (GH 10331)"""
Expand Down Expand Up @@ -1536,6 +1575,33 @@ def _has_valid_type(self, key, axis):
def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _is_scalar_access(self, key):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, '__len__'):
return False

if len(key) != self.ndim:
return False

for i, k in enumerate(key):
if not is_integer(k):
return False

ax = self.obj.axes[i]
if not ax.is_unique:
return False

return True

def _getitem_scalar(self, key):
# a fast-path to scalar access
# if not, raise
values = self.obj.get_value(*key, takeable=True)
return values

def _is_valid_integer(self, key, axis):
# return a boolean if we have a valid integer indexer

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def test_getitem_iterator(self):
expected = self.frame.ix[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)

idx = iter(['A', 'B', 'C'])
result = self.frame.loc[:, idx]
expected = self.frame.loc[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)

def test_getitem_list(self):
self.frame.columns.name = 'foo'

Expand Down Expand Up @@ -1667,6 +1672,24 @@ def test_single_element_ix_dont_upcast(self):
result = self.frame.ix[self.frame.index[5], 'E']
self.assertTrue(is_integer(result))

result = self.frame.loc[self.frame.index[5], 'E']
self.assertTrue(is_integer(result))

# GH 11617
df = pd.DataFrame(dict(a=[1.23]))
df["b"] = 666

result = df.ix[0, "b"]
self.assertTrue(is_integer(result))
result = df.loc[0, "b"]
self.assertTrue(is_integer(result))

expected = Series([666], [0], name='b')
result = df.ix[[0], "b"]
assert_series_equal(result, expected)
result = df.loc[[0], "b"]
assert_series_equal(result, expected)

def test_irow(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))

Expand Down Expand Up @@ -2159,9 +2182,13 @@ def test_index_namedtuple(self):
index = Index([idx1, idx2],
name="composite_index", tupleize_cols=False)
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])

result = df.ix[IndexType("foo", "bar")]["A"]
self.assertEqual(result, 1)

result = df.loc[IndexType("foo", "bar")]["A"]
self.assertEqual(result, 1)

def test_boolean_indexing(self):
idx = lrange(3)
cols = ['A', 'B', 'C']
Expand Down

0 comments on commit 801c8d9

Please sign in to comment.