DEPR: Deprecate convert parameter in take (pandas-dev#17352)

xref pandas-devgh-16948. The parameter is not respected, nor is it a parameter in many 'take' implementations.
No-Stream · Nov 28, 2017 · 5a998a8 · 5a998a8
1 parent 01d1820
commit 5a998a8
Show file tree

Hide file tree

Showing 11 changed files with 144 additions and 73 deletions.
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -566,6 +566,7 @@ Deprecations
 ~~~~~~~~~~~~
 
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
+- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2034,7 +2034,7 @@ def _ixs(self, i, axis=0):
                 return self.loc[:, lab_slice]
             else:
                 if isinstance(label, Index):
-                    return self.take(i, axis=1, convert=True)
+                    return self._take(i, axis=1, convert=True)
 
                 index_len = len(self.index)
 
@@ -2116,10 +2116,10 @@ def _getitem_array(self, key):
             # be reindexed to match DataFrame rows
             key = check_bool_indexer(self.index, key)
             indexer = key.nonzero()[0]
-            return self.take(indexer, axis=0, convert=False)
+            return self._take(indexer, axis=0, convert=False)
         else:
             indexer = self.loc._convert_to_indexer(key, axis=1)
-            return self.take(indexer, axis=1, convert=True)
+            return self._take(indexer, axis=1, convert=True)
 
     def _getitem_multilevel(self, key):
         loc = self.columns.get_loc(key)
@@ -3355,7 +3355,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
                 else:
                     raise TypeError('must specify how or thresh')
 
-            result = self.take(mask.nonzero()[0], axis=axis, convert=False)
+            result = self._take(mask.nonzero()[0], axis=axis, convert=False)
 
         if inplace:
             self._update_inplace(result)
@@ -3486,7 +3486,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
 
         new_data = self._data.take(indexer,
                                    axis=self._get_block_manager_axis(axis),
-                                   convert=False, verify=False)
+                                   verify=False)
 
         if inplace:
             return self._update_inplace(new_data)
@@ -3547,7 +3547,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         baxis = self._get_block_manager_axis(axis)
         new_data = self._data.take(indexer,
                                    axis=baxis,
-                                   convert=False, verify=False)
+                                   verify=False)
 
         # reconstruct axis if needed
         new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -38,6 +38,7 @@
 from pandas.core.index import (Index, MultiIndex, _ensure_index,
                                InvalidIndexError)
 import pandas.core.indexing as indexing
+from pandas.core.indexing import maybe_convert_indices
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.period import PeriodIndex, Period
 from pandas.core.internals import BlockManager
@@ -1822,7 +1823,8 @@ def _iget_item_cache(self, item):
         if ax.is_unique:
             lower = self._get_item_cache(ax[item])
         else:
-            lower = self.take(item, axis=self._info_axis_number, convert=True)
+            lower = self._take(item, axis=self._info_axis_number,
+                               convert=True)
         return lower
 
     def _box_item_values(self, key, values):
@@ -2057,8 +2059,63 @@ def __delitem__(self, key):
         except KeyError:
             pass
 
-    def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
+    _shared_docs['_take'] = """
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
+
+        This is the internal version of ``.take()`` and will contain a wider
+        selection of parameters useful for internal use but not as suitable
+        for public usage.
+
+        Parameters
+        ----------
+        indices : array-like
+            An array of ints indicating which positions to take.
+        axis : int, default 0
+            The axis on which to select elements. "0" means that we are
+            selecting rows, "1" means that we are selecting columns, etc.
+        convert : bool, default True
+            Whether to convert negative indices into positive ones.
+            For example, ``-1`` would map to the ``len(axis) - 1``.
+            The conversions are similar to the behavior of indexing a
+            regular Python list.
+        is_copy : bool, default True
+            Whether to return a copy of the original object or not.
+
+        Returns
+        -------
+        taken : type of caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        numpy.ndarray.take
+        numpy.take
         """
+
+    @Appender(_shared_docs['_take'])
+    def _take(self, indices, axis=0, convert=True, is_copy=True):
+        self._consolidate_inplace()
+
+        if convert:
+            indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
+
+        new_data = self._data.take(indices,
+                                   axis=self._get_block_manager_axis(axis),
+                                   verify=True)
+        result = self._constructor(new_data).__finalize__(self)
+
+        # Maybe set copy if we didn't actually change the index.
+        if is_copy:
+            if not result._get_axis(axis).equals(self._get_axis(axis)):
+                result._set_is_copy(self)
+
+        return result
+
+    _shared_docs['take'] = """
         Return the elements in the given *positional* indices along an axis.
 
         This means that we are not indexing according to actual values in
@@ -2073,9 +2130,12 @@ def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
             The axis on which to select elements. "0" means that we are
             selecting rows, "1" means that we are selecting columns, etc.
         convert : bool, default True
-            Whether to convert negative indices to positive ones, just as with
-            indexing into Python lists. For example, if `-1` was passed in,
-            this index would be converted ``n - 1``.
+            .. deprecated:: 0.21.0
+
+            Whether to convert negative indices into positive ones.
+            For example, ``-1`` would map to the ``len(axis) - 1``.
+            The conversions are similar to the behavior of indexing a
+            regular Python list.
         is_copy : bool, default True
             Whether to return a copy of the original object or not.
 
@@ -2131,19 +2191,17 @@ class  max_speed
         numpy.ndarray.take
         numpy.take
         """
+
+    @Appender(_shared_docs['take'])
+    def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
         nv.validate_take(tuple(), kwargs)
-        self._consolidate_inplace()
-        new_data = self._data.take(indices,
-                                   axis=self._get_block_manager_axis(axis),
-                                   convert=True, verify=True)
-        result = self._constructor(new_data).__finalize__(self)
 
-        # maybe set copy if we didn't actually change the index
-        if is_copy:
-            if not result._get_axis(axis).equals(self._get_axis(axis)):
-                result._set_is_copy(self)
+        if not convert:
+            msg = ("The 'convert' parameter is deprecated "
+                   "and will be removed in a future version.")
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
-        return result
+        return self._take(indices, axis=axis, convert=convert, is_copy=is_copy)
 
     def xs(self, key, axis=0, level=None, drop_level=True):
         """
@@ -2244,9 +2302,9 @@ def xs(self, key, axis=0, level=None, drop_level=True):
             if isinstance(loc, np.ndarray):
                 if loc.dtype == np.bool_:
                     inds, = loc.nonzero()
-                    return self.take(inds, axis=axis, convert=False)
+                    return self._take(inds, axis=axis, convert=False)
                 else:
-                    return self.take(loc, axis=axis, convert=True)
+                    return self._take(loc, axis=axis, convert=True)
 
             if not is_scalar(loc):
                 new_index = self.index[loc]
@@ -5112,7 +5170,7 @@ def at_time(self, time, asof=False):
         """
         try:
             indexer = self.index.indexer_at_time(time, asof=asof)
-            return self.take(indexer, convert=False)
+            return self._take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 
@@ -5136,7 +5194,7 @@ def between_time(self, start_time, end_time, include_start=True,
             indexer = self.index.indexer_between_time(
                 start_time, end_time, include_start=include_start,
                 include_end=include_end)
-            return self.take(indexer, convert=False)
+            return self._take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -322,8 +322,8 @@ def _set_grouper(self, obj, sort=False):
             # use stable sort to support first, last, nth
             indexer = self.indexer = ax.argsort(kind='mergesort')
             ax = ax.take(indexer)
-            obj = obj.take(indexer, axis=self.axis,
-                           convert=False, is_copy=False)
+            obj = obj._take(indexer, axis=self.axis,
+                            convert=False, is_copy=False)
 
         self.obj = obj
         self.grouper = ax
@@ -640,7 +640,7 @@ def get_group(self, name, obj=None):
         if not len(inds):
             raise KeyError(name)
 
-        return obj.take(inds, axis=self.axis, convert=False)
+        return obj._take(inds, axis=self.axis, convert=False)
 
     def __iter__(self):
         """
@@ -2226,7 +2226,7 @@ def _aggregate_series_fast(self, obj, func):
         # avoids object / Series creation overhead
         dummy = obj._get_values(slice(None, 0)).to_dense()
         indexer = get_group_index_sorter(group_index, ngroups)
-        obj = obj.take(indexer, convert=False).to_dense()
+        obj = obj._take(indexer, convert=False).to_dense()
         group_index = algorithms.take_nd(
             group_index, indexer, allow_fill=False)
         grouper = lib.SeriesGrouper(obj, func, group_index, ngroups,
@@ -4523,7 +4523,7 @@ def __iter__(self):
             yield i, self._chop(sdata, slice(start, end))
 
     def _get_sorted_data(self):
-        return self.data.take(self.sort_idx, axis=self.axis, convert=False)
+        return self.data._take(self.sort_idx, axis=self.axis, convert=False)
 
     def _chop(self, sdata, slice_obj):
         return sdata.iloc[slice_obj]

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1093,7 +1093,7 @@ def _getitem_iterable(self, key, axis=0):
         if is_bool_indexer(key):
             key = check_bool_indexer(labels, key)
             inds, = key.nonzero()
-            return self.obj.take(inds, axis=axis, convert=False)
+            return self.obj._take(inds, axis=axis, convert=False)
         else:
             # Have the index compute an indexer or return None
             # if it cannot handle; we only act on all found values
@@ -1126,15 +1126,15 @@ def _getitem_iterable(self, key, axis=0):
                     keyarr)
 
                 if new_indexer is not None:
-                    result = self.obj.take(indexer[indexer != -1], axis=axis,
-                                           convert=False)
+                    result = self.obj._take(indexer[indexer != -1], axis=axis,
+                                            convert=False)
 
                     result = result._reindex_with_indexers(
                         {axis: [new_target, new_indexer]},
                         copy=True, allow_dups=True)
 
                 else:
-                    result = self.obj.take(indexer, axis=axis, convert=False)
+                    result = self.obj._take(indexer, axis=axis)
 
                 return result
 
@@ -1265,7 +1265,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(indexer, slice):
             return self._slice(indexer, axis=axis, kind='iloc')
         else:
-            return self.obj.take(indexer, axis=axis, convert=False)
+            return self.obj._take(indexer, axis=axis, convert=False)
 
 
 class _IXIndexer(_NDFrameIndexer):
@@ -1350,7 +1350,7 @@ def _getbool_axis(self, key, axis=0):
         key = check_bool_indexer(labels, key)
         inds, = key.nonzero()
         try:
-            return self.obj.take(inds, axis=axis, convert=False)
+            return self.obj._take(inds, axis=axis, convert=False)
         except Exception as detail:
             raise self._exception(detail)
 
@@ -1367,7 +1367,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(indexer, slice):
             return self._slice(indexer, axis=axis, kind='iloc')
         else:
-            return self.obj.take(indexer, axis=axis, convert=False)
+            return self.obj._take(indexer, axis=axis, convert=False)
 
 
 class _LocIndexer(_LocationIndexer):
@@ -1707,7 +1707,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(slice_obj, slice):
             return self._slice(slice_obj, axis=axis, kind='iloc')
         else:
-            return self.obj.take(slice_obj, axis=axis, convert=False)
+            return self.obj._take(slice_obj, axis=axis, convert=False)
 
     def _get_list_axis(self, key, axis=0):
         """
@@ -1723,7 +1723,7 @@ def _get_list_axis(self, key, axis=0):
         Series object
         """
         try:
-            return self.obj.take(key, axis=axis, convert=False)
+            return self.obj._take(key, axis=axis, convert=False)
         except IndexError:
             # re-raise with different error message
             raise IndexError("positional indexers are out-of-bounds")

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2563,35 +2563,24 @@ def memory_usage(self, index=True, deep=False):
             v += self.index.memory_usage(deep=deep)
         return v
 
-    def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
-        """
-        return Series corresponding to requested indices
-
-        Parameters
-        ----------
-        indices : list / array of ints
-        convert : translate negative to positive indices (default)
-
-        Returns
-        -------
-        taken : Series
-
-        See also
-        --------
-        numpy.ndarray.take
-        """
-        if kwargs:
-            nv.validate_take(tuple(), kwargs)
-
-        # check/convert indicies here
+    @Appender(generic._shared_docs['_take'])
+    def _take(self, indices, axis=0, convert=True, is_copy=False):
         if convert:
             indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
 
         indices = _ensure_platform_int(indices)
         new_index = self.index.take(indices)
         new_values = self._values.take(indices)
-        return (self._constructor(new_values, index=new_index, fastpath=True)
-                    .__finalize__(self))
+
+        result = (self._constructor(new_values, index=new_index,
+                                    fastpath=True).__finalize__(self))
+
+        # Maybe set copy if we didn't actually change the index.
+        if is_copy:
+            if not result._get_axis(axis).equals(self._get_axis(axis)):
+                result._set_is_copy(self)
+
+        return result
 
     def isin(self, values):
         """

diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -602,16 +602,15 @@ def sparse_reindex(self, new_index):
                                  sparse_index=new_index,
                                  fill_value=self.fill_value).__finalize__(self)
 
+    @Appender(generic._shared_docs['take'])
     def take(self, indices, axis=0, convert=True, *args, **kwargs):
-        """
-        Sparse-compatible version of ndarray.take
+        convert = nv.validate_take_with_convert(convert, args, kwargs)
 
-        Returns
-        -------
-        taken : ndarray
-        """
+        if not convert:
+            msg = ("The 'convert' parameter is deprecated "
+                   "and will be removed in a future version.")
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
-        convert = nv.validate_take_with_convert(convert, args, kwargs)
         new_values = SparseArray.take(self.values, indices)
         new_index = self.index.take(indices)
         return self._constructor(new_values,