BUG: indexing changes to .loc for compat to .ix for several situations

handle iterator handle NamedTuple .loc retuns scalar selection dtypes correctly, closes pandas-dev#11617 xref pandas-dev#15113
jreback · Jan 12, 2017 · 801c8d9 · 801c8d9
1 parent 0fe491d
commit 801c8d9
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 9 deletions.
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -244,7 +244,7 @@ Other API Changes
 - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
 - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
 - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`)
-
+- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`)
 - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
 - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
 - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)
@@ -318,7 +318,7 @@ Bug Fixes
 - Bug in ``Series`` construction with a datetimetz (:issue:`14928`)
 
 - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`)
-
+- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`)
 
 
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -9,6 +9,7 @@
                                  is_categorical_dtype,
                                  is_list_like,
                                  is_sequence,
+                                 is_iterator,
                                  is_scalar,
                                  is_sparse,
                                  _is_unorderable_exception,
@@ -1300,17 +1301,24 @@ class _LocationIndexer(_NDFrameIndexer):
     _exception = Exception
 
     def __getitem__(self, key):
-        if isinstance(key, tuple):
-            key = tuple(com._apply_if_callable(x, self.obj) for x in key)
-        else:
-            # scalar callable may return tuple
-            key = com._apply_if_callable(key, self.obj)
-
         if type(key) is tuple:
+            key = tuple(com._apply_if_callable(x, self.obj) for x in key)
+            try:
+                if self._is_scalar_access(key):
+                    return self._getitem_scalar(key)
+            except (KeyError, IndexError):
+                pass
             return self._getitem_tuple(key)
         else:
+            key = com._apply_if_callable(key, self.obj)
             return self._getitem_axis(key, axis=0)
 
+    def _is_scalar_access(self, key):
+        raise NotImplementedError()
+
+    def _getitem_scalar(self, key):
+        raise NotImplementedError()
+
     def _getitem_axis(self, key, axis=0):
         raise NotImplementedError()
 
@@ -1389,7 +1397,8 @@ def _has_valid_type(self, key, axis):
                 return True
 
             # TODO: don't check the entire key unless necessary
-            if len(key) and np.all(ax.get_indexer_for(key) < 0):
+            if (not is_iterator(key) and len(key) and
+                    np.all(ax.get_indexer_for(key) < 0)):
 
                 raise KeyError("None of [%s] are in the [%s]" %
                                (key, self.obj._get_axis_name(axis)))
@@ -1420,6 +1429,36 @@ def error():
 
         return True
 
+    def _is_scalar_access(self, key):
+        # this is a shortcut accessor to both .loc and .iloc
+        # that provide the equivalent access of .at and .iat
+        # a) avoid getting things via sections and (to minimize dtype changes)
+        # b) provide a performant path
+        if not hasattr(key, '__len__'):
+            return False
+
+        if len(key) != self.ndim:
+            return False
+
+        for i, k in enumerate(key):
+            if not is_scalar(k):
+                return False
+
+            ax = self.obj.axes[i]
+            if isinstance(ax, MultiIndex):
+                return False
+
+            if not ax.is_unique:
+                return False
+
+        return True
+
+    def _getitem_scalar(self, key):
+        # a fast-path to scalar access
+        # if not, raise
+        values = self.obj.get_value(*key)
+        return values
+
     def _get_partial_string_timestamp_match_key(self, key, labels):
         """Translate any partial string timestamp matches in key, returning the
         new key (GH 10331)"""
@@ -1536,6 +1575,33 @@ def _has_valid_type(self, key, axis):
     def _has_valid_setitem_indexer(self, indexer):
         self._has_valid_positional_setitem_indexer(indexer)
 
+    def _is_scalar_access(self, key):
+        # this is a shortcut accessor to both .loc and .iloc
+        # that provide the equivalent access of .at and .iat
+        # a) avoid getting things via sections and (to minimize dtype changes)
+        # b) provide a performant path
+        if not hasattr(key, '__len__'):
+            return False
+
+        if len(key) != self.ndim:
+            return False
+
+        for i, k in enumerate(key):
+            if not is_integer(k):
+                return False
+
+            ax = self.obj.axes[i]
+            if not ax.is_unique:
+                return False
+
+        return True
+
+    def _getitem_scalar(self, key):
+        # a fast-path to scalar access
+        # if not, raise
+        values = self.obj.get_value(*key, takeable=True)
+        return values
+
     def _is_valid_integer(self, key, axis):
         # return a boolean if we have a valid integer indexer
 

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -93,6 +93,11 @@ def test_getitem_iterator(self):
         expected = self.frame.ix[:, ['A', 'B', 'C']]
         assert_frame_equal(result, expected)
 
+        idx = iter(['A', 'B', 'C'])
+        result = self.frame.loc[:, idx]
+        expected = self.frame.loc[:, ['A', 'B', 'C']]
+        assert_frame_equal(result, expected)
+
     def test_getitem_list(self):
         self.frame.columns.name = 'foo'
 
@@ -1667,6 +1672,24 @@ def test_single_element_ix_dont_upcast(self):
         result = self.frame.ix[self.frame.index[5], 'E']
         self.assertTrue(is_integer(result))
 
+        result = self.frame.loc[self.frame.index[5], 'E']
+        self.assertTrue(is_integer(result))
+
+        # GH 11617
+        df = pd.DataFrame(dict(a=[1.23]))
+        df["b"] = 666
+
+        result = df.ix[0, "b"]
+        self.assertTrue(is_integer(result))
+        result = df.loc[0, "b"]
+        self.assertTrue(is_integer(result))
+
+        expected = Series([666], [0], name='b')
+        result = df.ix[[0], "b"]
+        assert_series_equal(result, expected)
+        result = df.loc[[0], "b"]
+        assert_series_equal(result, expected)
+
     def test_irow(self):
         df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))
 
@@ -2159,9 +2182,13 @@ def test_index_namedtuple(self):
         index = Index([idx1, idx2],
                       name="composite_index", tupleize_cols=False)
         df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
+
         result = df.ix[IndexType("foo", "bar")]["A"]
         self.assertEqual(result, 1)
 
+        result = df.loc[IndexType("foo", "bar")]["A"]
+        self.assertEqual(result, 1)
+
     def test_boolean_indexing(self):
         idx = lrange(3)
         cols = ['A', 'B', 'C']