From a2f670c76e73ffe28d5e03c96e8f87c4477ab90e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 25 Jul 2019 15:12:48 -0700 Subject: [PATCH] CLN: Prune unnecessary indexing code (#27576) --- pandas/core/frame.py | 10 ++-- pandas/core/generic.py | 47 ++++++++--------- pandas/core/indexes/multi.py | 49 ++++++++++++++---- pandas/core/indexing.py | 70 ++++++++++++-------------- pandas/core/series.py | 17 +------ pandas/tests/indexing/test_indexing.py | 3 +- 6 files changed, 99 insertions(+), 97 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 34025d942b377..cdbe0e9d22eb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -94,12 +94,9 @@ ) from pandas.core.indexes import base as ibase from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.multi import maybe_droplevels from pandas.core.indexes.period import PeriodIndex -from pandas.core.indexing import ( - check_bool_indexer, - convert_to_index_sliceable, - maybe_droplevels, -) +from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( arrays_to_mgr, @@ -2794,8 +2791,6 @@ def _ixs(self, i: int, axis: int = 0): if axis == 0: label = self.index[i] new_values = self._data.fast_xs(i) - if is_scalar(new_values): - return new_values # if we are a copy, mark as such copy = isinstance(new_values, np.ndarray) and new_values.base is None @@ -2906,6 +2901,7 @@ def _getitem_bool_array(self, key): return self.take(indexer, axis=0) def _getitem_multilevel(self, key): + # self.columns is a MultiIndex loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): new_columns = self.columns[loc] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c0d4baef6cebe..b900e1e82255d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3296,11 +3296,8 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): if clear: self._clear_item_cache() - def _clear_item_cache(self, i=None): - if i is not None: - self._item_cache.pop(i, None) - else: - self._item_cache.clear() + def _clear_item_cache(self): + self._item_cache.clear() # ---------------------------------------------------------------------- # Indexing Methods @@ -3559,27 +3556,8 @@ class animal locomotion _xs = xs # type: Callable - def get(self, key, default=None): - """ - Get item from object for given key (ex: DataFrame column). - - Returns default value if not found. - - Parameters - ---------- - key : object - - Returns - ------- - value : same type as items contained in object - """ - try: - return self[key] - except (KeyError, ValueError, IndexError): - return default - def __getitem__(self, item): - return self._get_item_cache(item) + raise AbstractMethodError(self) def _get_item_cache(self, item): """Return the cached item, item represents a label indexer.""" @@ -3770,6 +3748,25 @@ def __delitem__(self, key): # ---------------------------------------------------------------------- # Unsorted + def get(self, key, default=None): + """ + Get item from object for given key (ex: DataFrame column). + + Returns default value if not found. + + Parameters + ---------- + key : object + + Returns + ------- + value : same type as items contained in object + """ + try: + return self[key] + except (KeyError, ValueError, IndexError): + return default + @property def _is_view(self): """Return boolean indicating if self is view of another array """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0123230bc0f63..a7c3449615299 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1470,9 +1470,6 @@ def dropna(self, how="any"): return self.copy(codes=new_codes, deep=True) def get_value(self, series, key): - # somewhat broken encapsulation - from pandas.core.indexing import maybe_droplevels - # Label-based s = com.values_from_object(series) k = com.values_from_object(key) @@ -2709,7 +2706,7 @@ def _maybe_to_slice(loc): return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) - def get_loc_level(self, key, level=0, drop_level=True): + def get_loc_level(self, key, level=0, drop_level: bool = True): """ Get both the location for the requested label(s) and the resulting sliced index. @@ -2750,7 +2747,8 @@ def get_loc_level(self, key, level=0, drop_level=True): (1, None) """ - def maybe_droplevels(indexer, levels, drop_level): + # different name to distinguish from maybe_droplevels + def maybe_mi_droplevels(indexer, levels, drop_level: bool): if not drop_level: return self[indexer] # kludgearound @@ -2780,7 +2778,7 @@ def maybe_droplevels(indexer, levels, drop_level): result = loc if result is None else result & loc - return result, maybe_droplevels(result, level, drop_level) + return result, maybe_mi_droplevels(result, level, drop_level) level = self._get_level_number(level) @@ -2793,7 +2791,7 @@ def maybe_droplevels(indexer, levels, drop_level): try: if key in self.levels[0]: indexer = self._get_level_indexer(key, level=level) - new_index = maybe_droplevels(indexer, [0], drop_level) + new_index = maybe_mi_droplevels(indexer, [0], drop_level) return indexer, new_index except TypeError: pass @@ -2808,7 +2806,7 @@ def partial_selection(key, indexer=None): ilevels = [ i for i in range(len(key)) if key[i] != slice(None, None) ] - return indexer, maybe_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) if len(key) == self.nlevels and self.is_unique: # Complete key in unique index -> standard get_loc @@ -2843,10 +2841,10 @@ def partial_selection(key, indexer=None): if indexer is None: indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, maybe_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) else: indexer = self._get_level_indexer(key, level=level) - return indexer, maybe_droplevels(indexer, [level], drop_level) + return indexer, maybe_mi_droplevels(indexer, [level], drop_level) def _get_level_indexer(self, key, level=0, indexer=None): # return an indexer, boolean array or a slice showing where the key is @@ -3464,3 +3462,34 @@ def _sparsify(label_list, start=0, sentinel=""): def _get_na_rep(dtype): return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") + + +def maybe_droplevels(index, key): + """ + Attempt to drop level or levels from the given index. + + Parameters + ---------- + index: Index + key : scalar or tuple + + Returns + ------- + Index + """ + # drop levels + original_index = index + if isinstance(key, tuple): + for _ in key: + try: + index = index.droplevel(0) + except ValueError: + # we have dropped too much, so back out + return original_index + else: + try: + index = index.droplevel(0) + except ValueError: + pass + + return index diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1d4ea54ef0d70..cb33044c4e23f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -226,7 +226,7 @@ def _validate_key(self, key, axis: int): def _has_valid_tuple(self, key: Tuple): """ check the key for valid keys across my indexer """ for i, k in enumerate(key): - if i >= self.obj.ndim: + if i >= self.ndim: raise IndexingError("Too many indexers") try: self._validate_key(k, i) @@ -254,7 +254,7 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(slice(None)) else: for i, k in enumerate(key): - if i >= self.obj.ndim: + if i >= self.ndim: raise IndexingError("Too many indexers") idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter) keyidx.append(idx) @@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): raise IndexError("{0} cannot enlarge its target object".format(self.name)) else: if not isinstance(indexer, tuple): - indexer = self._tuplify(indexer) + indexer = _tuplify(self.ndim, indexer) for ax, i in zip(self.obj.axes, indexer): if isinstance(i, slice): # should check the stop slice? @@ -401,7 +401,7 @@ def _setitem_with_indexer(self, indexer, value): assert info_axis == 1 if not isinstance(indexer, tuple): - indexer = self._tuplify(indexer) + indexer = _tuplify(self.ndim, indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) @@ -670,7 +670,7 @@ def ravel(i): aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 - is_frame = self.obj.ndim == 2 + is_frame = self.ndim == 2 obj = self.obj # are we a single alignable value on a non-primary @@ -731,7 +731,7 @@ def ravel(i): raise ValueError("Incompatible indexer with Series") def _align_frame(self, indexer, df: ABCDataFrame): - is_frame = self.obj.ndim == 2 + is_frame = self.ndim == 2 if isinstance(indexer, tuple): @@ -867,7 +867,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): except KeyError as ek: # raise KeyError if number of indexers match # else IndexingError will be raised - if len(tup) <= self.obj.index.nlevels and len(tup) > self.obj.ndim: + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: raise ek except Exception as e1: if isinstance(tup[0], (slice, Index)): @@ -900,7 +900,7 @@ def _getitem_lowerdim(self, tup: Tuple): if result is not None: return result - if len(tup) > self.obj.ndim: + if len(tup) > self.ndim: raise IndexingError("Too many indexers. handle elsewhere") # to avoid wasted computation @@ -1274,11 +1274,6 @@ def _convert_to_indexer( return {"key": obj} raise - def _tuplify(self, loc): - tup = [slice(None, None) for _ in range(self.ndim)] - tup[0] = loc - return tuple(tup) - def _get_slice_axis(self, slice_obj: slice, axis: int): # caller is responsible for ensuring non-None axis obj = self.obj @@ -2077,6 +2072,8 @@ def _getitem_tuple(self, tup: Tuple): # if the dim was reduced, then pass a lower-dim the next time if retval.ndim < self.ndim: + # TODO: this is never reached in tests; can we confirm that + # it is impossible? axis -= 1 # try to get for the next axis @@ -2152,7 +2149,7 @@ def _convert_to_indexer( ) -class _ScalarAccessIndexer(_NDFrameIndexer): +class _ScalarAccessIndexer(_NDFrameIndexerBase): """ access scalars quickly """ def _convert_key(self, key, is_setter: bool = False): @@ -2178,8 +2175,8 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self.obj) if not isinstance(key, tuple): - key = self._tuplify(key) - if len(key) != self.obj.ndim: + key = _tuplify(self.ndim, key) + if len(key) != self.ndim: raise ValueError("Not enough indexers for scalar access (setting)!") key = list(self._convert_key(key, is_setter=True)) key.append(value) @@ -2309,9 +2306,6 @@ class _iAtIndexer(_ScalarAccessIndexer): _takeable = True - def _has_valid_setitem_indexer(self, indexer): - self._has_valid_positional_setitem_indexer(indexer) - def _convert_key(self, key, is_setter: bool = False): """ require integer args (and convert to label arguments) """ for a, i in zip(self.obj.axes, key): @@ -2320,6 +2314,25 @@ def _convert_key(self, key, is_setter: bool = False): return key +def _tuplify(ndim: int, loc) -> tuple: + """ + Given an indexer for the first dimension, create an equivalent tuple + for indexing over all dimensions. + + Parameters + ---------- + ndim : int + loc : object + + Returns + ------- + tuple + """ + tup = [slice(None, None) for _ in range(ndim)] + tup[0] = loc + return tuple(tup) + + def convert_to_index_sliceable(obj, key): """ if we are index sliceable, then return my slicer, otherwise return None @@ -2469,25 +2482,6 @@ def need_slice(obj): ) -def maybe_droplevels(index, key): - # drop levels - original_index = index - if isinstance(key, tuple): - for _ in key: - try: - index = index.droplevel(0) - except ValueError: - # we have dropped too much, so back out - return original_index - else: - try: - index = index.droplevel(0) - except ValueError: - pass - - return index - - def _non_reducing_slice(slice_): """ Ensurse that a slice doesn't reduce to a Series or Scalar. diff --git a/pandas/core/series.py b/pandas/core/series.py index 592aa658556c7..59f39758cb3b0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -12,7 +12,7 @@ from pandas._config import get_option -from pandas._libs import iNaT, index as libindex, lib, reshape, tslibs +from pandas._libs import index as libindex, lib, reshape, tslibs from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate @@ -47,7 +47,6 @@ ABCSparseSeries, ) from pandas.core.dtypes.missing import ( - is_valid_nat_for_dtype, isna, na_value_for_dtype, notna, @@ -1237,20 +1236,6 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - elif com.is_bool_indexer(key): - pass - elif is_timedelta64_dtype(self.dtype): - # reassign a null value to iNaT - if is_valid_nat_for_dtype(value, self.dtype): - # exclude np.datetime64("NaT") - value = iNaT - - try: - self.index._engine.set_value(self._values, key, value) - return - except (TypeError, ValueError): - # ValueError appears in only some builds in CI - pass self.loc[key] = value return diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 8d1971801cccc..e375bd459e66f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import PY36 +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype @@ -1168,7 +1169,7 @@ def test_extension_array_cross_section_converts(): @pytest.mark.parametrize( "idxr, error, error_message", [ - (lambda x: x, AttributeError, "'numpy.ndarray' object has no attribute 'get'"), + (lambda x: x, AbstractMethodError, None), ( lambda x: x.loc, AttributeError,