Skip to content

Commit

Permalink
API: add "level=" argument to MultiIndex.unique()
Browse files Browse the repository at this point in the history
closes #17896
  • Loading branch information
toobaz committed Nov 19, 2017
1 parent b00e62c commit 50f199d
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Other Enhancements

- Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`)
- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`)
- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`)
- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in ouptut (:issue:`14194`)
- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`)

Expand Down
28 changes: 26 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3757,8 +3757,32 @@ def drop(self, labels, errors='raise'):
indexer = indexer[~mask]
return self.delete(indexer)

@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
def unique(self):
_index_shared_docs['index_unique'] = (
"""
Return unique values in the index. Uniques are returned in order
of appearance, this does NOT sort.
Parameters
----------
level : int or str, optional, default None
Only return values from specified level (for MultiIndex)
.. versionadded:: 0.22.0
Returns
-------
Index without duplicates
See Also
--------
unique
Series.unique
""")

@Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
def unique(self, level=None):
if level is not None:
self._validate_index_level(level)
result = super(Index, self).unique()
return self._shallow_copy(result)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,10 @@ def is_monotonic_increasing(self):
def is_monotonic_decreasing(self):
return Index(self.codes).is_monotonic_decreasing

@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
def unique(self):
@Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
def unique(self, level=None):
if level not in {0, self.name, None}:
raise ValueError("Level {} not found".format(level))
result = base.IndexOpsMixin.unique(self)
# CategoricalIndex._shallow_copy uses keeps original categories
# and ordered if not otherwise specified
Expand Down
23 changes: 18 additions & 5 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ def _try_mi(k):

raise InvalidIndexError(key)

def _get_level_values(self, level):
def _get_level_values(self, level, unique=False):
"""
Return vector of label values for requested level,
equal to the length of the index
Expand All @@ -918,17 +918,21 @@ def _get_level_values(self, level):
Parameters
----------
level : int level
unique : bool, default False
if True, drop duplicated values
Returns
-------
values : ndarray
"""

unique = self.levels[level]
values = self.levels[level]
labels = self.labels[level]
filled = algos.take_1d(unique._values, labels,
fill_value=unique._na_value)
values = unique._shallow_copy(filled)
if unique:
labels = algos.unique(labels)
filled = algos.take_1d(values._values, labels,
fill_value=values._na_value)
values = values._shallow_copy(filled)
return values

def get_level_values(self, level):
Expand Down Expand Up @@ -967,6 +971,15 @@ def get_level_values(self, level):
values = self._get_level_values(level)
return values

@Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
def unique(self, level=None):

if level is None:
return super(MultiIndex, self).unique()
else:
level = self._get_level_number(level)
return self._get_level_values(level=level, unique=True)

def format(self, space=2, sparsify=None, adjoin=True, names=False,
na_rep=None, formatter=None):
if len(self) == 0:
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,27 @@ def test_duplicates(self, indices):
assert not idx.is_unique
assert idx.has_duplicates

def test_unique(self, indices):
# don't test a MultiIndex here (as its tested separated)
# don't test a CategoricalIndex because categories change (GH 18291)
if isinstance(indices, (MultiIndex, CategoricalIndex)):
return

# GH 17896
expected = indices.drop_duplicates()
for level in 0, indices.name, None:
result = indices.unique(level=level)
tm.assert_index_equal(result, expected)

for level in 3, 'wrong':
pytest.raises((IndexError, KeyError), indices.unique, level=level)

def test_unique_na(self):
idx = pd.Index([2, np.nan, 2, 1], name='my_index')
expected = pd.Index([2, np.nan, 1], name='my_index')
result = idx.unique()
tm.assert_index_equal(result, expected)

def test_get_unique_index(self, indices):
# MultiIndex tested separately
if not len(indices) or isinstance(indices, MultiIndex):
Expand Down
32 changes: 24 additions & 8 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,19 +963,21 @@ def test_get_level_values(self):
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)

def test_get_level_values_na(self):
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
def test_get_level_values_int_with_na(self):
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
values = index.get_level_values(1)
expected = np.array([1, np.nan, 2])
tm.assert_numpy_array_equal(values.values.astype(float), expected)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)

arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
values = index.get_level_values(1)
expected = np.array([np.nan, np.nan, 2])
tm.assert_numpy_array_equal(values.values.astype(float), expected)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)

def test_get_level_values_na(self):
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
Expand All @@ -990,7 +992,7 @@ def test_get_level_values_na(self):
index = pd.MultiIndex.from_arrays(arrays)
values = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_numpy_array_equal(values.values, expected.values)
tm.assert_index_equal(values, expected)

arrays = [[], []]
index = pd.MultiIndex.from_arrays(arrays)
Expand Down Expand Up @@ -2277,6 +2279,20 @@ def test_unique(self):
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
tm.assert_index_equal(res, exp)

@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
def test_unique_level(self, level):
# GH #17896 - with level= argument
result = self.index.unique(level=level)
expected = self.index.get_level_values(level).unique()
tm.assert_index_equal(result, expected)

# With already unique level
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)

def test_unique_datetimelike(self):
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
'2015-01-01', 'NaT', 'NaT'])
Expand Down

0 comments on commit 50f199d

Please sign in to comment.