From e5b5922cdb5a5ecbcdef746b197d365722f3b5eb Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 16:41:58 +0100 Subject: [PATCH] DOC: update the pandas.Index.drop_duplicates and pandas.Series.drop_duplicates docstring (#20114) --- pandas/core/base.py | 18 ---------- pandas/core/indexes/base.py | 46 +++++++++++++++++++++++- pandas/core/series.py | 71 ++++++++++++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 20 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e37..fd039480fc6f17 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1184,24 +1184,6 @@ def searchsorted(self, value, side='left', sorter=None): # needs coercion on the key (DatetimeIndex does already) return self.values.searchsorted(value, side=side, sorter=sorter) - _shared_docs['drop_duplicates'] = ( - """Return %(klass)s with duplicate values removed - - Parameters - ---------- - - keep : {'first', 'last', False}, default 'first' - - ``first`` : Drop duplicates except for the first occurrence. - - ``last`` : Drop duplicates except for the last occurrence. - - False : Drop all duplicates. - %(inplace)s - - Returns - ------- - deduplicated : %(klass)s - """) - - @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 52283e4e223b49..888507a5296875 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4017,8 +4017,52 @@ def unique(self, level=None): result = super(Index, self).unique() return self._shallow_copy(result) - @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): + """ + Return Index with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + Returns + ------- + deduplicated : Index + + See Also + -------- + Series.drop_duplicates : equivalent method on Series + DataFrame.drop_duplicates : equivalent method on DataFrame + Index.duplicated : related method on Index, indicating duplicate + Index values. + + Examples + -------- + Generate an pandas.Index with duplicate values. + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + + The `keep` parameter controls which duplicate values are removed. + The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> idx.drop_duplicates(keep='first') + Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + + The value 'last' keeps the last occurrence for each set of duplicated + entries. + + >>> idx.drop_duplicates(keep='last') + Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + + The value ``False`` discards all sets of duplicated entries. + + >>> idx.drop_duplicates(keep=False) + Index(['cow', 'beetle', 'hippo'], dtype='object') + """ return super(Index, self).drop_duplicates(keep=keep) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1a..090f599c860ae2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1316,8 +1316,77 @@ def unique(self): return result - @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): + """ + Return Series with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + inplace : boolean, default ``False`` + If ``True``, performs operation inplace and returns None. + + Returns + ------- + deduplicated : Series + + See Also + -------- + Index.drop_duplicates : equivalent method on Index + DataFrame.drop_duplicates : equivalent method on DataFrame + Series.duplicated : related method on Series, indicating duplicate + Series values. + + Examples + -------- + Generate an Series with duplicated entries. + + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + ... name='animal') + >>> s + 0 lama + 1 cow + 2 lama + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 lama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + The value ``False`` for parameter 'keep' discards all sets of + duplicated entries. Setting the value of 'inplace' to ``True`` performs + the operation inplace and returns ``None``. + + >>> s.drop_duplicates(keep=False, inplace=True) + >>> s + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + """ return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs)