From 724610ff559428c941c0425ab0264413c69435dc Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@jda.com>
Date: Sun, 16 Sep 2018 20:48:29 -0400
Subject: [PATCH 1/5] DOC: Fix Series nsmallest and nlargest docstring/doctests

---
 ci/doctests.sh        |   2 +-
 pandas/core/series.py | 157 ++++++++++++++++++++++++++++++------------
 2 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/ci/doctests.sh b/ci/doctests.sh
index 2af5dbd26aeb1..2b5edc5aa1172 100755
--- a/ci/doctests.sh
+++ b/ci/doctests.sh
@@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then
     fi
 
     pytest --doctest-modules -v pandas/core/series.py \
-        -k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict"
+        -k"-nonzero -reindex -searchsorted -to_dict"
 
     if [ $? -ne "0" ]; then
         RET=1
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a4d403e4bcd94..571ab059a4d04 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2743,17 +2743,20 @@ def nlargest(self, n=5, keep='first'):
 
         Parameters
         ----------
-        n : int
-            Return this many descending sorted values
-        keep : {'first', 'last'}, default 'first'
-            Where there are duplicate values:
-            - ``first`` : take the first occurrence.
-            - ``last`` : take the last occurrence.
+        n : int, default 5
+            Return this many descending sorted values.
+        keep : str, default 'first'
+            When there are duplicate values that cannot all fit in a
+            Series of `n` elements:
+            - ``first`` : take the first occurrences based on the index order
+            - ``last`` : take the last occurrences based on the index order
+            - ``all`` : keep all occurrences. This can result in a Series of
+                size larger than `n`.
 
         Returns
         -------
-        top_n : Series
-            The n largest values in the Series, in sorted order
+        Series
+            The n largest values in the Series, sorted in decreasing order.
 
         Notes
         -----
@@ -2762,23 +2765,56 @@ def nlargest(self, n=5, keep='first'):
 
         See Also
         --------
-        Series.nsmallest
+        Series.nsmallest: Get the `n` smallest elements.
 
         Examples
         --------
-        >>> s = pd.Series(np.random.randn(10**6))
-        >>> s.nlargest(10)  # only sorts up to the N requested
-        219921    4.644710
-        82124     4.608745
-        421689    4.564644
-        425277    4.447014
-        718691    4.414137
-        43154     4.403520
-        283187    4.313922
-        595519    4.273635
-        503969    4.250236
-        121637    4.240952
-        dtype: float64
+        >>> countries_population = {"Italy": 59000000, "France": 65000000,
+        ...                         "Malta": 434000, "Maldives": 434000,
+        ...                         "Brunei": 434000, "Iceland": 337000,
+        ...                         "Nauru": 11300, "Tuvalu": 11300,
+        ...                         "Anguilla": 11300, "Monserat": 5200}
+        >>> s = pd.Series(countries_population)
+        >>> s
+        Italy       59000000
+        France      65000000
+        Malta         434000
+        Maldives      434000
+        Brunei        434000
+        Iceland       337000
+        Nauru          11300
+        Tuvalu         11300
+        Anguilla       11300
+        Monserat        5200
+        dtype: int64
+
+        >>> s.nlargest()
+        France      65000000
+        Italy       59000000
+        Malta         434000
+        Maldives      434000
+        Brunei        434000
+        dtype: int64
+
+        >>> s.nlargest(3)
+        France    65000000
+        Italy     59000000
+        Malta       434000
+        dtype: int64
+
+        >>> s.nlargest(3, keep='last')
+        France      65000000
+        Italy       59000000
+        Brunei        434000
+        dtype: int64
+
+        >>> s.nlargest(3, keep='all')
+        France      65000000
+        Italy       59000000
+        Malta         434000
+        Maldives      434000
+        Brunei        434000
+        dtype: int64
         """
         return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
 
@@ -2789,16 +2825,19 @@ def nsmallest(self, n=5, keep='first'):
         Parameters
         ----------
         n : int
-            Return this many ascending sorted values
-        keep : {'first', 'last'}, default 'first'
-            Where there are duplicate values:
-            - ``first`` : take the first occurrence.
-            - ``last`` : take the last occurrence.
+            Return this many ascending sorted values.
+        keep : str, default 'first'
+            When there are duplicate values that cannot all fit in a
+            Series of `n` elements:
+            - ``first`` : take the first occurrences based on the index order
+            - ``last`` : take the last occurrences based on the index order
+            - ``all`` : keep all occurrences. This can result in a Series of
+                size larger than `n`.
 
         Returns
         -------
-        bottom_n : Series
-            The n smallest values in the Series, in sorted order
+        Series
+            The n smallest values in the Series, sorted in increasing order.
 
         Notes
         -----
@@ -2807,23 +2846,55 @@ def nsmallest(self, n=5, keep='first'):
 
         See Also
         --------
-        Series.nlargest
+        Series.nlargest: Get the `n` largest elements.
 
         Examples
         --------
-        >>> s = pd.Series(np.random.randn(10**6))
-        >>> s.nsmallest(10)  # only sorts up to the N requested
-        288532   -4.954580
-        732345   -4.835960
-        64803    -4.812550
-        446457   -4.609998
-        501225   -4.483945
-        669476   -4.472935
-        973615   -4.401699
-        621279   -4.355126
-        773916   -4.347355
-        359919   -4.331927
-        dtype: float64
+        >>> countries_population = {"Italy": 59000000, "France": 65000000,
+        ...                         "Brunei": 434000, "Malta": 434000,
+        ...                         "Maldives": 434000, "Iceland": 337000,
+        ...                         "Nauru": 11300, "Tuvalu": 11300,
+        ...                         "Anguilla": 11300, "Monserat": 5200}
+        >>> s = pd.Series(countries_population)
+        >>> s
+        Italy       59000000
+        France      65000000
+        Brunei        434000
+        Malta         434000
+        Maldives      434000
+        Iceland       337000
+        Nauru          11300
+        Tuvalu         11300
+        Anguilla       11300
+        Monserat        5200
+        dtype: int64
+
+        >>> s.nsmallest()
+        Monserat      5200
+        Nauru        11300
+        Tuvalu       11300
+        Anguilla     11300
+        Iceland     337780
+        dtype: int64
+
+        >>> s.nsmallest(3)
+        Monserat     5200
+        Nauru       11300
+        Tuvalu      11300
+        dtype: int64
+
+        >>> s.nsmallest(3, keep='last')
+        Monserat     5200
+        Anguilla    11300
+        Tuvalu      11300
+        dtype: int64
+
+        >>> s.nsmallest(3, keep='all')
+        Monserat     5200
+        Nauru       11300
+        Tuvalu      11300
+        Anguilla    11300
+        dtype: int64
         """
         return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
 

From 1af1280853b443d1fd56bfbe80b27d26ee453302 Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@jda.com>
Date: Mon, 17 Sep 2018 07:44:37 -0400
Subject: [PATCH 2/5] Fix a typo in nsmallest doctest

---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 571ab059a4d04..069c91a6aab6a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2874,7 +2874,7 @@ def nsmallest(self, n=5, keep='first'):
         Nauru        11300
         Tuvalu       11300
         Anguilla     11300
-        Iceland     337780
+        Iceland     337000
         dtype: int64
 
         >>> s.nsmallest(3)

From 5c881f9419c917e6b6a3dd177776ab6324b6e2c5 Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@jda.com>
Date: Mon, 17 Sep 2018 15:48:54 -0400
Subject: [PATCH 3/5] Add quick descriptions in the doctests of Series.nlargest
 and Series.nsmallest

---
 pandas/core/series.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 069c91a6aab6a..05eeb2fd55392 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2788,6 +2788,8 @@ def nlargest(self, n=5, keep='first'):
         Monserat        5200
         dtype: int64
 
+        The n largest elements where n=5 by default.
+
         >>> s.nlargest()
         France      65000000
         Italy       59000000
@@ -2796,18 +2798,29 @@ def nlargest(self, n=5, keep='first'):
         Brunei        434000
         dtype: int64
 
+        The n largest elements where n=3. Default keep value is 'first' so
+        Malta will
+        be kept.
+
         >>> s.nlargest(3)
         France    65000000
         Italy     59000000
         Malta       434000
         dtype: int64
 
+        The n largest elements where n=3 and keeping the last duplicates.
+        Brunei will be kept since it is the last with value 434000 based on
+        the index order.
+
         >>> s.nlargest(3, keep='last')
         France      65000000
         Italy       59000000
         Brunei        434000
         dtype: int64
 
+        The n largest elements where n=3 with all duplicates kept. Note that the
+        returned Series has five elements due to the three duplicates.
+
         >>> s.nlargest(3, keep='all')
         France      65000000
         Italy       59000000
@@ -2869,6 +2882,8 @@ def nsmallest(self, n=5, keep='first'):
         Monserat        5200
         dtype: int64
 
+        The n largest elements where n=5 by default.
+
         >>> s.nsmallest()
         Monserat      5200
         Nauru        11300
@@ -2877,18 +2892,28 @@ def nsmallest(self, n=5, keep='first'):
         Iceland     337000
         dtype: int64
 
+        The n smallest elements where n=3. Default keep value is 'first' so
+        Nauru and Tuvalu will be kept.
+
         >>> s.nsmallest(3)
         Monserat     5200
         Nauru       11300
         Tuvalu      11300
         dtype: int64
 
+        The n smallest elements where n=3 and keeping the last duplicates.
+        Anguilla and Tuvalu will be kept since they are the last with value
+        11300 based on the index order.
+
         >>> s.nsmallest(3, keep='last')
         Monserat     5200
         Anguilla    11300
         Tuvalu      11300
         dtype: int64
 
+        The n smallest elements where n=3 with all duplicates kept. Note
+        that the returned Series has four elements due to the three duplicates.
+
         >>> s.nsmallest(3, keep='all')
         Monserat     5200
         Nauru       11300

From 5d6d5ed08a6ec75169ecd5df8dba360a6fe41264 Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@jda.com>
Date: Mon, 17 Sep 2018 21:46:43 -0400
Subject: [PATCH 4/5] Update nlargest and nsmallest docstring with backticks

---
 pandas/core/series.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 05eeb2fd55392..3926580d32fc4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2756,7 +2756,7 @@ def nlargest(self, n=5, keep='first'):
         Returns
         -------
         Series
-            The n largest values in the Series, sorted in decreasing order.
+            The `n` largest values in the Series, sorted in decreasing order.
 
         Notes
         -----
@@ -2788,7 +2788,7 @@ def nlargest(self, n=5, keep='first'):
         Monserat        5200
         dtype: int64
 
-        The n largest elements where n=5 by default.
+        The `n` largest elements where ``n=5`` by default.
 
         >>> s.nlargest()
         France      65000000
@@ -2798,9 +2798,8 @@ def nlargest(self, n=5, keep='first'):
         Brunei        434000
         dtype: int64
 
-        The n largest elements where n=3. Default keep value is 'first' so
-        Malta will
-        be kept.
+        The `n` largest elements where ``n=3``. Default `keep` value is 'first'
+        so Malta will be kept.
 
         >>> s.nlargest(3)
         France    65000000
@@ -2808,7 +2807,7 @@ def nlargest(self, n=5, keep='first'):
         Malta       434000
         dtype: int64
 
-        The n largest elements where n=3 and keeping the last duplicates.
+        The `n` largest elements where ``n=3`` and keeping the last duplicates.
         Brunei will be kept since it is the last with value 434000 based on
         the index order.
 
@@ -2818,8 +2817,8 @@ def nlargest(self, n=5, keep='first'):
         Brunei        434000
         dtype: int64
 
-        The n largest elements where n=3 with all duplicates kept. Note that the
-        returned Series has five elements due to the three duplicates.
+        The `n` largest elements where ``n=3`` with all duplicates kept. Note
+        that the returned Series has five elements due to the three duplicates.
 
         >>> s.nlargest(3, keep='all')
         France      65000000
@@ -2850,7 +2849,7 @@ def nsmallest(self, n=5, keep='first'):
         Returns
         -------
         Series
-            The n smallest values in the Series, sorted in increasing order.
+            The `n` smallest values in the Series, sorted in increasing order.
 
         Notes
         -----
@@ -2882,7 +2881,7 @@ def nsmallest(self, n=5, keep='first'):
         Monserat        5200
         dtype: int64
 
-        The n largest elements where n=5 by default.
+        The `n` largest elements where ``n=5`` by default.
 
         >>> s.nsmallest()
         Monserat      5200
@@ -2892,8 +2891,8 @@ def nsmallest(self, n=5, keep='first'):
         Iceland     337000
         dtype: int64
 
-        The n smallest elements where n=3. Default keep value is 'first' so
-        Nauru and Tuvalu will be kept.
+        The `n` smallest elements where ``n=3``. Default `keep` value is
+        'first' so Nauru and Tuvalu will be kept.
 
         >>> s.nsmallest(3)
         Monserat     5200
@@ -2901,9 +2900,9 @@ def nsmallest(self, n=5, keep='first'):
         Tuvalu      11300
         dtype: int64
 
-        The n smallest elements where n=3 and keeping the last duplicates.
-        Anguilla and Tuvalu will be kept since they are the last with value
-        11300 based on the index order.
+        The `n` smallest elements where ``n=3`` and keeping the last
+        duplicates. Anguilla and Tuvalu will be kept since they are the last
+        with value 11300 based on the index order.
 
         >>> s.nsmallest(3, keep='last')
         Monserat     5200
@@ -2911,7 +2910,7 @@ def nsmallest(self, n=5, keep='first'):
         Tuvalu      11300
         dtype: int64
 
-        The n smallest elements where n=3 with all duplicates kept. Note
+        The `n` smallest elements where ``n=3`` with all duplicates kept. Note
         that the returned Series has four elements due to the three duplicates.
 
         >>> s.nsmallest(3, keep='all')

From 7f311f9e7176475d6940074475804117d1982639 Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@jda.com>
Date: Tue, 18 Sep 2018 07:57:51 -0400
Subject: [PATCH 5/5] Various changes to nlargest and nsmallest based on
 datapythonista review

---
 pandas/core/series.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 3926580d32fc4..8ce58ed6f0554 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2745,7 +2745,7 @@ def nlargest(self, n=5, keep='first'):
         ----------
         n : int, default 5
             Return this many descending sorted values.
-        keep : str, default 'first'
+        keep : {'first', 'last', 'all'}, default 'first'
             When there are duplicate values that cannot all fit in a
             Series of `n` elements:
             - ``first`` : take the first occurrences based on the index order
@@ -2766,6 +2766,8 @@ def nlargest(self, n=5, keep='first'):
         See Also
         --------
         Series.nsmallest: Get the `n` smallest elements.
+        Series.sort_values: Sort Series by values.
+        Series.head: Return the first `n` rows.
 
         Examples
         --------
@@ -2836,9 +2838,9 @@ def nsmallest(self, n=5, keep='first'):
 
         Parameters
         ----------
-        n : int
+        n : int, default 5
             Return this many ascending sorted values.
-        keep : str, default 'first'
+        keep : {'first', 'last', 'all'}, default 'first'
             When there are duplicate values that cannot all fit in a
             Series of `n` elements:
             - ``first`` : take the first occurrences based on the index order
@@ -2859,6 +2861,8 @@ def nsmallest(self, n=5, keep='first'):
         See Also
         --------
         Series.nlargest: Get the `n` largest elements.
+        Series.sort_values: Sort Series by values.
+        Series.head: Return the first `n` rows.
 
         Examples
         --------