From 724610ff559428c941c0425ab0264413c69435dc Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Sun, 16 Sep 2018 20:48:29 -0400 Subject: [PATCH 1/5] DOC: Fix Series nsmallest and nlargest docstring/doctests --- ci/doctests.sh | 2 +- pandas/core/series.py | 157 ++++++++++++++++++++++++++++++------------ 2 files changed, 115 insertions(+), 44 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..2b5edc5aa1172 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/series.py \ - -k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict" + -k"-nonzero -reindex -searchsorted -to_dict" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/series.py b/pandas/core/series.py index a4d403e4bcd94..571ab059a4d04 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2743,17 +2743,20 @@ def nlargest(self, n=5, keep='first'): Parameters ---------- - n : int - Return this many descending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + n : int, default 5 + Return this many descending sorted values. + keep : str, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - top_n : Series - The n largest values in the Series, in sorted order + Series + The n largest values in the Series, sorted in decreasing order. Notes ----- @@ -2762,23 +2765,56 @@ def nlargest(self, n=5, keep='first'): See Also -------- - Series.nsmallest + Series.nsmallest: Get the `n` smallest elements. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nlargest(10) # only sorts up to the N requested - 219921 4.644710 - 82124 4.608745 - 421689 4.564644 - 425277 4.447014 - 718691 4.414137 - 43154 4.403520 - 283187 4.313922 - 595519 4.273635 - 503969 4.250236 - 121637 4.240952 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() @@ -2789,16 +2825,19 @@ def nsmallest(self, n=5, keep='first'): Parameters ---------- n : int - Return this many ascending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + Return this many ascending sorted values. + keep : str, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - bottom_n : Series - The n smallest values in the Series, in sorted order + Series + The n smallest values in the Series, sorted in increasing order. Notes ----- @@ -2807,23 +2846,55 @@ def nsmallest(self, n=5, keep='first'): See Also -------- - Series.nlargest + Series.nlargest: Get the `n` largest elements. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nsmallest(10) # only sorts up to the N requested - 288532 -4.954580 - 732345 -4.835960 - 64803 -4.812550 - 446457 -4.609998 - 501225 -4.483945 - 669476 -4.472935 - 973615 -4.401699 - 621279 -4.355126 - 773916 -4.347355 - 359919 -4.331927 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + >>> s.nsmallest() + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337780 + dtype: int64 + + >>> s.nsmallest(3) + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + >>> s.nsmallest(3, keep='last') + Monserat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + >>> s.nsmallest(3, keep='all') + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() From 1af1280853b443d1fd56bfbe80b27d26ee453302 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 17 Sep 2018 07:44:37 -0400 Subject: [PATCH 2/5] Fix a typo in nsmallest doctest --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 571ab059a4d04..069c91a6aab6a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2874,7 +2874,7 @@ def nsmallest(self, n=5, keep='first'): Nauru 11300 Tuvalu 11300 Anguilla 11300 - Iceland 337780 + Iceland 337000 dtype: int64 >>> s.nsmallest(3) From 5c881f9419c917e6b6a3dd177776ab6324b6e2c5 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 17 Sep 2018 15:48:54 -0400 Subject: [PATCH 3/5] Add quick descriptions in the doctests of Series.nlargest and Series.nsmallest --- pandas/core/series.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 069c91a6aab6a..05eeb2fd55392 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2788,6 +2788,8 @@ def nlargest(self, n=5, keep='first'): Monserat 5200 dtype: int64 + The n largest elements where n=5 by default. + >>> s.nlargest() France 65000000 Italy 59000000 @@ -2796,18 +2798,29 @@ def nlargest(self, n=5, keep='first'): Brunei 434000 dtype: int64 + The n largest elements where n=3. Default keep value is 'first' so + Malta will + be kept. + >>> s.nlargest(3) France 65000000 Italy 59000000 Malta 434000 dtype: int64 + The n largest elements where n=3 and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + >>> s.nlargest(3, keep='last') France 65000000 Italy 59000000 Brunei 434000 dtype: int64 + The n largest elements where n=3 with all duplicates kept. Note that the + returned Series has five elements due to the three duplicates. + >>> s.nlargest(3, keep='all') France 65000000 Italy 59000000 @@ -2869,6 +2882,8 @@ def nsmallest(self, n=5, keep='first'): Monserat 5200 dtype: int64 + The n largest elements where n=5 by default. + >>> s.nsmallest() Monserat 5200 Nauru 11300 @@ -2877,18 +2892,28 @@ def nsmallest(self, n=5, keep='first'): Iceland 337000 dtype: int64 + The n smallest elements where n=3. Default keep value is 'first' so + Nauru and Tuvalu will be kept. + >>> s.nsmallest(3) Monserat 5200 Nauru 11300 Tuvalu 11300 dtype: int64 + The n smallest elements where n=3 and keeping the last duplicates. + Anguilla and Tuvalu will be kept since they are the last with value + 11300 based on the index order. + >>> s.nsmallest(3, keep='last') Monserat 5200 Anguilla 11300 Tuvalu 11300 dtype: int64 + The n smallest elements where n=3 with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + >>> s.nsmallest(3, keep='all') Monserat 5200 Nauru 11300 From 5d6d5ed08a6ec75169ecd5df8dba360a6fe41264 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 17 Sep 2018 21:46:43 -0400 Subject: [PATCH 4/5] Update nlargest and nsmallest docstring with backticks --- pandas/core/series.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 05eeb2fd55392..3926580d32fc4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2756,7 +2756,7 @@ def nlargest(self, n=5, keep='first'): Returns ------- Series - The n largest values in the Series, sorted in decreasing order. + The `n` largest values in the Series, sorted in decreasing order. Notes ----- @@ -2788,7 +2788,7 @@ def nlargest(self, n=5, keep='first'): Monserat 5200 dtype: int64 - The n largest elements where n=5 by default. + The `n` largest elements where ``n=5`` by default. >>> s.nlargest() France 65000000 @@ -2798,9 +2798,8 @@ def nlargest(self, n=5, keep='first'): Brunei 434000 dtype: int64 - The n largest elements where n=3. Default keep value is 'first' so - Malta will - be kept. + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. >>> s.nlargest(3) France 65000000 @@ -2808,7 +2807,7 @@ def nlargest(self, n=5, keep='first'): Malta 434000 dtype: int64 - The n largest elements where n=3 and keeping the last duplicates. + The `n` largest elements where ``n=3`` and keeping the last duplicates. Brunei will be kept since it is the last with value 434000 based on the index order. @@ -2818,8 +2817,8 @@ def nlargest(self, n=5, keep='first'): Brunei 434000 dtype: int64 - The n largest elements where n=3 with all duplicates kept. Note that the - returned Series has five elements due to the three duplicates. + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. >>> s.nlargest(3, keep='all') France 65000000 @@ -2850,7 +2849,7 @@ def nsmallest(self, n=5, keep='first'): Returns ------- Series - The n smallest values in the Series, sorted in increasing order. + The `n` smallest values in the Series, sorted in increasing order. Notes ----- @@ -2882,7 +2881,7 @@ def nsmallest(self, n=5, keep='first'): Monserat 5200 dtype: int64 - The n largest elements where n=5 by default. + The `n` largest elements where ``n=5`` by default. >>> s.nsmallest() Monserat 5200 @@ -2892,8 +2891,8 @@ def nsmallest(self, n=5, keep='first'): Iceland 337000 dtype: int64 - The n smallest elements where n=3. Default keep value is 'first' so - Nauru and Tuvalu will be kept. + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. >>> s.nsmallest(3) Monserat 5200 @@ -2901,9 +2900,9 @@ def nsmallest(self, n=5, keep='first'): Tuvalu 11300 dtype: int64 - The n smallest elements where n=3 and keeping the last duplicates. - Anguilla and Tuvalu will be kept since they are the last with value - 11300 based on the index order. + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. >>> s.nsmallest(3, keep='last') Monserat 5200 @@ -2911,7 +2910,7 @@ def nsmallest(self, n=5, keep='first'): Tuvalu 11300 dtype: int64 - The n smallest elements where n=3 with all duplicates kept. Note + The `n` smallest elements where ``n=3`` with all duplicates kept. Note that the returned Series has four elements due to the three duplicates. >>> s.nsmallest(3, keep='all') From 7f311f9e7176475d6940074475804117d1982639 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 18 Sep 2018 07:57:51 -0400 Subject: [PATCH 5/5] Various changes to nlargest and nsmallest based on datapythonista review --- pandas/core/series.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3926580d32fc4..8ce58ed6f0554 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2745,7 +2745,7 @@ def nlargest(self, n=5, keep='first'): ---------- n : int, default 5 Return this many descending sorted values. - keep : str, default 'first' + keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: - ``first`` : take the first occurrences based on the index order @@ -2766,6 +2766,8 @@ def nlargest(self, n=5, keep='first'): See Also -------- Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples -------- @@ -2836,9 +2838,9 @@ def nsmallest(self, n=5, keep='first'): Parameters ---------- - n : int + n : int, default 5 Return this many ascending sorted values. - keep : str, default 'first' + keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: - ``first`` : take the first occurrences based on the index order @@ -2859,6 +2861,8 @@ def nsmallest(self, n=5, keep='first'): See Also -------- Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples --------