Skip to content

Commit

Permalink
TST: Clean up tests of DataFrame.sort_{index,values} (#13496)
Browse files Browse the repository at this point in the history
* TST: Clean up tests of DataFrame.sort_{index,values}

* Factor out Series sorting tests to own file.

* Delegate deprecated sort() and order() to their own tests.

Before this commit, the `Series.sort_values()` tests relied on deprecated
`Series.sort()` and `Series.order()` as the source of truth. However
they both merely called `Series.sort_values()` under the hood.

This commit consolidates the core test logic against `.sort_values()`
directly, while `.sort()` and `.order()` merely check for equivalence
with `.sort_values()`.

Also removes some no-op assertions that had rotted from the old days of
`sort()`/`order()`.

* Remove 'by' docstring from Series.sort_values

* Document defaults for optional sorting args

* Move more sort_values, sort_index tests to be together.

* Add test for Series.sort_index(sort_remaining=True)

* Improve `sort_values` tests when multiple `by`s

Duplicates values in the test DataFrame are necessary
to fully test this feature.

* PEP8 cleanup

* Annotate tests with GH issue

* Fix indentation - docstring string replacement
  • Loading branch information
IamJeffG authored and jorisvandenbossche committed Jul 11, 2016
1 parent 2f7fdd0 commit 65849d3
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 215 deletions.
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@
# ---------------------------------------------------------------------
# Docstring templates

_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame',
axes_single_arg="{0, 1, 'index', 'columns'}")
_shared_doc_kwargs = dict(
axes='index, columns', klass='DataFrame',
axes_single_arg="{0, 1, 'index', 'columns'}",
optional_by="""
by : str or list of str
Name or list of names which refer to the axis items.""")

_numeric_only_doc = """numeric_only : boolean, default None
Include only float, int, boolean data. If None, will attempt to use
Expand Down
32 changes: 17 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@
# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs = dict()
_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame',
axes_single_arg='int or labels for object',
args_transpose='axes to permute (int or label for'
' object)')
_shared_doc_kwargs = dict(
axes='keywords for axes', klass='NDFrame',
axes_single_arg='int or labels for object',
args_transpose='axes to permute (int or label for object)',
optional_by="""
by : str or list of str
Name or list of names which refer to the axis items.""")


def is_dictlike(x):
Expand Down Expand Up @@ -1961,21 +1964,20 @@ def add_suffix(self, suffix):
.. versionadded:: 0.17.0
Parameters
----------
by : string name or list of names which refer to the axis items
axis : %(axes)s to direct sorting
ascending : bool or list of bool
----------%(optional_by)s
axis : %(axes)s to direct sorting, default 0
ascending : bool or list of bool, default True
Sort ascending vs. descending. Specify list for multiple sort
orders. If this is a list of bools, must match the length of
the by.
inplace : bool
inplace : bool, default False
if True, perform operation in-place
kind : {`quicksort`, `mergesort`, `heapsort`}
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See also ndarray.np.sort for more
information. `mergesort` is the only stable algorithm. For
DataFrames, this option is only applied when sorting on a single
column or label.
na_position : {'first', 'last'}
na_position : {'first', 'last'}, default 'last'
`first` puts NaNs at the beginning, `last` puts NaNs at the end
Returns
Expand All @@ -1997,16 +1999,16 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
if not None, sort on values in specified index level(s)
ascending : boolean, default True
Sort ascending vs. descending
inplace : bool
inplace : bool, default False
if True, perform operation in-place
kind : {`quicksort`, `mergesort`, `heapsort`}
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See also ndarray.np.sort for more
information. `mergesort` is the only stable algorithm. For
DataFrames, this option is only applied when sorting on a single
column or label.
na_position : {'first', 'last'}
na_position : {'first', 'last'}, default 'last'
`first` puts NaNs at the beginning, `last` puts NaNs at the end
sort_remaining : bool
sort_remaining : bool, default True
if true and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
axes='index', klass='Series', axes_single_arg="{0, 'index'}",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None.""",
duplicated='Series')
duplicated='Series',
optional_by='')


def _coerce_method(converter):
Expand Down
116 changes: 55 additions & 61 deletions pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,75 +21,68 @@ class TestDataFrameSorting(tm.TestCase, TestData):

_multiprocess_can_split_ = True

def test_sort_values(self):
# API for 9816
def test_sort_index(self):
# GH13496

# sort_index
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

# 9816 deprecated
with tm.assert_produces_warning(FutureWarning):
frame.sort(columns='A')
with tm.assert_produces_warning(FutureWarning):
frame.sort()

# axis=0 : sort rows by index labels
unordered = frame.ix[[3, 2, 4, 1]]
expected = unordered.sort_index()

result = unordered.sort_index(axis=0)
expected = frame
assert_frame_equal(result, expected)

unordered = frame.ix[:, [2, 1, 3, 0]]
expected = unordered.sort_index(axis=1)
result = unordered.sort_index(ascending=False)
expected = frame[::-1]
assert_frame_equal(result, expected)

# axis=1 : sort columns by column names
unordered = frame.ix[:, [2, 1, 3, 0]]
result = unordered.sort_index(axis=1)
assert_frame_equal(result, expected)
assert_frame_equal(result, frame)

result = unordered.sort_index(axis=1, ascending=False)
expected = frame.ix[:, ::-1]
assert_frame_equal(result, expected)

# sortlevel
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
def test_sort_index_multiindex(self):
# GH13496

# sort rows by specified level of multi-index
mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC'))
df = DataFrame([[1, 2], [3, 4]], mi)

result = df.sort_index(level='A', sort_remaining=False)
expected = df.sortlevel('A', sort_remaining=False)
assert_frame_equal(result, expected)

# sort columns by specified level of multi-index
df = df.T
result = df.sort_index(level='A', axis=1, sort_remaining=False)
expected = df.sortlevel('A', axis=1, sort_remaining=False)
assert_frame_equal(result, expected)

# MI sort, but no by
# MI sort, but no level: sort_level has no effect
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
df = DataFrame([[1, 2], [3, 4]], mi)
result = df.sort_index(sort_remaining=False)
expected = df.sort_index()
assert_frame_equal(result, expected)

def test_sort_index(self):
def test_sort(self):
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

# axis=0
unordered = frame.ix[[3, 2, 4, 1]]
sorted_df = unordered.sort_index(axis=0)
expected = frame
assert_frame_equal(sorted_df, expected)

sorted_df = unordered.sort_index(ascending=False)
expected = frame[::-1]
assert_frame_equal(sorted_df, expected)

# axis=1
unordered = frame.ix[:, ['D', 'B', 'C', 'A']]
sorted_df = unordered.sort_index(axis=1)
expected = frame
assert_frame_equal(sorted_df, expected)
# 9816 deprecated
with tm.assert_produces_warning(FutureWarning):
frame.sort(columns='A')
with tm.assert_produces_warning(FutureWarning):
frame.sort()

sorted_df = unordered.sort_index(axis=1, ascending=False)
expected = frame.ix[:, ::-1]
assert_frame_equal(sorted_df, expected)
def test_sort_values(self):
frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]],
index=[1, 2, 3], columns=list('ABC'))

# by column
sorted_df = frame.sort_values(by='A')
Expand All @@ -109,16 +102,17 @@ def test_sort_index(self):
sorted_df = frame.sort_values(by=['A'], ascending=[False])
assert_frame_equal(sorted_df, expected)

# check for now
sorted_df = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected[::-1])
expected = frame.sort_values(by='A')
# multiple bys
sorted_df = frame.sort_values(by=['B', 'C'])
expected = frame.loc[[2, 1, 3]]
assert_frame_equal(sorted_df, expected)

expected = frame.sort_values(by=['A', 'B'], ascending=False)
sorted_df = frame.sort_values(by=['A', 'B'])
sorted_df = frame.sort_values(by=['B', 'C'], ascending=False)
assert_frame_equal(sorted_df, expected[::-1])

sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False])
assert_frame_equal(sorted_df, expected)

self.assertRaises(ValueError, lambda: frame.sort_values(
by=['A', 'B'], axis=2, inplace=True))

Expand All @@ -130,6 +124,25 @@ def test_sort_index(self):
with assertRaisesRegexp(ValueError, msg):
frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5)

def test_sort_values_inplace(self):
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

sorted_df = frame.copy()
sorted_df.sort_values(by='A', inplace=True)
expected = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by='A', ascending=False, inplace=True)
expected = frame.sort_values(by='A', ascending=False)
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
expected = frame.sort_values(by=['A', 'B'], ascending=False)
assert_frame_equal(sorted_df, expected)

def test_sort_index_categorical_index(self):

df = (DataFrame({'A': np.arange(6, dtype='int64'),
Expand Down Expand Up @@ -361,25 +374,6 @@ def test_sort_index_different_sortorder(self):
result = idf['C'].sort_index(ascending=[1, 0])
assert_series_equal(result, expected['C'])

def test_sort_inplace(self):
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

sorted_df = frame.copy()
sorted_df.sort_values(by='A', inplace=True)
expected = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by='A', ascending=False, inplace=True)
expected = frame.sort_values(by='A', ascending=False)
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
expected = frame.sort_values(by=['A', 'B'], ascending=False)
assert_frame_equal(sorted_df, expected)

def test_sort_index_duplicates(self):

# with 9816, these are all translated to .sort_values
Expand Down
Loading

0 comments on commit 65849d3

Please sign in to comment.