diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index de2516d75040b..36f3db98a39b5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -53,6 +53,7 @@ Backwards incompatible API changes - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) .. _whatsnew_0210.api: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c4b3e25acae7e..daf3381ae4e89 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -896,8 +896,9 @@ def reset_identity(values): # we can't reindex, so we resort to this # GH 14776 if isinstance(ax, MultiIndex) and not ax.is_unique: - result = result.take(result.index.get_indexer_for( - ax.values).unique(), axis=self.axis) + indexer = algorithms.unique1d( + result.index.get_indexer_for(ax.values)) + result = result.take(indexer, axis=self.axis) else: result = result.reindex_axis(ax, axis=self.axis) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 695f9f119baa2..8a4878d9cfbcf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2256,8 +2256,8 @@ def intersection(self, other): indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(other._values).get_indexer_non_unique( - self._values)[0].unique() + indexer = algos.unique1d( + Index(other._values).get_indexer_non_unique(self._values)[0]) indexer = indexer[indexer != -1] taken = other.take(indexer) @@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target): tgt_values = target._values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return Index(indexer), missing + return indexer, missing def get_indexer_for(self, target, **kwargs): """ @@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target): else: # need to retake to have the same size as the indexer - indexer = indexer.values indexer[~check] = 0 # reset the new indexer to account for the new size diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..7a81a125467d5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + def test_get_indexer_consistency(self): + # See GH 16819 + for name, index in self.indices.items(): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 4e4f9b29f9a4c..493274fff43e0 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -386,8 +386,7 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal( - expected.values, actual, check_dtype=False) + tm.assert_numpy_array_equal(expected, actual) def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a'])