From 05d70f4e617a274813bdb02db69143b5554aa106 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 19:49:42 -0400 Subject: [PATCH] DOC: use shared docs on Index._convert_list_indexer (#15678) CLN: push key coercion to the indexes themselves to simplify a bit --- pandas/core/indexing.py | 86 ++++++++++---------------------------- pandas/indexes/base.py | 37 ++++++++++++++++ pandas/indexes/category.py | 19 ++++++--- pandas/indexes/multi.py | 33 +++++++++++++++ pandas/indexes/numeric.py | 1 + 5 files changed, 106 insertions(+), 70 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 546cbd8337e7e..19b7771251da3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -7,7 +7,6 @@ from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries from pandas.types.common import (is_integer_dtype, is_integer, is_float, - is_categorical_dtype, is_list_like, is_sequence, is_iterator, @@ -1087,51 +1086,24 @@ def _getitem_iterable(self, key, axis=0): inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: - if isinstance(key, Index): - keyarr = labels._convert_index_indexer(key) - else: - keyarr = _asarray_tuplesafe(key) - keyarr = labels._convert_arr_indexer(keyarr) - - if is_categorical_dtype(labels): - keyarr = labels._shallow_copy(keyarr) - - # have the index handle the indexer and possibly return - # an indexer or raising - indexer = labels._convert_list_indexer(keyarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, keyarr = labels._convert_listlike_indexer( + key, kind=self.name) if indexer is not None: return self.obj.take(indexer, axis=axis) - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and len(keyarr) and - not isinstance(keyarr[0], tuple)): - level = 0 - else: - level = None - # existing labels are unique and indexer are unique if labels.is_unique and Index(keyarr).is_unique: try: - result = self.obj.reindex_axis(keyarr, axis=axis, - level=level) - - # this is an error as we are trying to find - # keys in a multi-index that don't exist - if isinstance(labels, MultiIndex) and level is not None: - if (hasattr(result, 'ndim') and - not np.prod(result.shape) and len(keyarr)): - raise KeyError("cannot index a multi-index axis " - "with these keys") - - return result - + return self.obj.reindex_axis(keyarr, axis=axis) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') - return self.obj.reindex(keyarr, level=level) + return self.obj.reindex(keyarr) # existing labels are non-unique else: @@ -1225,49 +1197,33 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): if is_nested_tuple(obj, labels): return labels.get_locs(obj) + elif is_list_like_indexer(obj): + if is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: - if isinstance(obj, Index): - # want Index objects to pass through untouched - objarr = obj - else: - objarr = _asarray_tuplesafe(obj) - # The index may want to handle a list indexer differently - # by returning an indexer or raising - indexer = labels._convert_list_indexer(objarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, objarr = labels._convert_listlike_indexer( + obj, kind=self.name) if indexer is not None: return indexer - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and - not isinstance(objarr[0], tuple)): - level = 0 - _, indexer = labels.reindex(objarr, level=level) + # unique index + if labels.is_unique: + indexer = check = labels.get_indexer(objarr) - # take all - if indexer is None: - indexer = np.arange(len(labels)) - - check = labels.levels[0].get_indexer(objarr) + # non-unique (dups) else: - level = None - - # unique index - if labels.is_unique: - indexer = check = labels.get_indexer(objarr) - - # non-unique (dups) - else: - (indexer, - missing) = labels.get_indexer_non_unique(objarr) - # 'indexer' has dupes, create 'check' using 'missing' - check = np.zeros_like(objarr) - check[missing] = -1 + (indexer, + missing) = labels.get_indexer_non_unique(objarr) + # 'indexer' has dupes, create 'check' using 'missing' + check = np.zeros_like(objarr) + check[missing] = -1 mask = check == -1 if mask.any(): diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f46f437489a1..5b942e2565c29 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1339,6 +1339,27 @@ def is_int(v): return indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + if isinstance(keyarr, Index): + keyarr = self._convert_index_indexer(keyarr) + else: + keyarr = self._convert_arr_indexer(keyarr) + + indexer = self._convert_list_indexer(keyarr, kind=kind) + return indexer, keyarr + _index_shared_docs['_convert_arr_indexer'] = """ Convert an array-like indexer to the appropriate dtype. @@ -1354,6 +1375,7 @@ def is_int(v): @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) return keyarr _index_shared_docs['_convert_index_indexer'] = """ @@ -1373,6 +1395,21 @@ def _convert_arr_indexer(self, keyarr): def _convert_index_indexer(self, keyarr): return keyarr + _index_shared_docs['_convert_list_indexer'] = """ + Convert a list-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + kind : iloc, ix, loc, optional + + Returns + ------- + positional indexer or None + """ + + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 3d8f76fc56b01..923dd4ec785c5 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -18,6 +18,8 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase +from pandas.core.common import _asarray_tuplesafe + _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) @@ -458,12 +460,10 @@ def get_indexer_non_unique(self, target): codes = self.categories.get_indexer(target) return self._engine.get_indexer_non_unique(codes) + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): - """ - we are passed a list indexer. - Return our indexer or raise if all of the values are not included in - the categories - """ + # Return our indexer or raise if all of the values are not included in + # the categories codes = self.categories.get_indexer(keyarr) if (codes == -1).any(): raise KeyError("a list-indexer must only include values that are " @@ -471,6 +471,15 @@ def _convert_list_indexer(self, keyarr, kind=None): return None + @Appender(_index_shared_docs['_convert_arr_indexer']) + def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) + return self._shallow_copy(keyarr) + + @Appender(_index_shared_docs['_convert_index_indexer']) + def _convert_index_indexer(self, keyarr): + return self._shallow_copy(keyarr) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index bca1db83b6645..1c1609fed1dd1 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1568,6 +1568,39 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): return new_index, indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer( + keyarr, kind=kind) + + # are we indexing a specific level + if indexer is None and len(keyarr) and not isinstance(keyarr[0], + tuple): + level = 0 + _, indexer = self.reindex(keyarr, level=level) + + # take all + if indexer is None: + indexer = np.arange(len(self)) + + check = self.levels[0].get_indexer(keyarr) + mask = check == -1 + if mask.any(): + raise KeyError('%s not in index' % keyarr[mask]) + + return indexer, keyarr + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 9bb70feb2501f..2f897c81975c2 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -203,6 +203,7 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so # that the values returned from indexing are # also uint64. + keyarr = _asarray_tuplesafe(keyarr) if is_integer_dtype(keyarr): return _asarray_tuplesafe(keyarr, dtype=np.uint64) return keyarr