Skip to content

Commit

Permalink
BUG: fixed formatting and small linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobaustin123 committed Nov 20, 2019
1 parent 619ca63 commit 47f9751
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 73 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ Other enhancements
- Roundtripping DataFrames with nullable integer or string data types to parquet
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
- :meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_index`, and :meth:`Series.sort_index`
now support the ``key`` argument which allows for custom sorting orders (:issue:`3942`)

Build Changes
^^^^^^^^^^^^^
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
"""
return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)

def sort_values(self, inplace=False, ascending=True, na_position="last"):
def sort_values(self, inplace=False, ascending=True, na_position="last", key=None):
"""
Sort the Categorical by category value returning a new
Categorical by default.
Expand Down Expand Up @@ -1682,7 +1682,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
msg = "invalid na_position: {na_position!r}"
raise ValueError(msg.format(na_position=na_position))

sorted_idx = nargsort(self, ascending=ascending, na_position=na_position)
sorted_idx = nargsort(
self, ascending=ascending, na_position=na_position, key=key
)

if inplace:
self._codes = self._codes[sorted_idx]
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sys
from textwrap import dedent
from typing import (
Callable,
FrozenSet,
Hashable,
Iterable,
Expand All @@ -25,7 +26,6 @@
Tuple,
Type,
Union,
Callable
)
import warnings

Expand Down Expand Up @@ -4715,15 +4715,15 @@ def f(vals):

@Substitution(**_shared_doc_kwargs)
@Appender(NDFrame.sort_values.__doc__)
def sort_values(
def sort_values( # type: ignore
self,
by,
axis=0,
ascending=True,
inplace=False,
kind="quicksort",
na_position="last",
key : Union[Callable, None] = None
key: Optional[Callable] = None,
):
inplace = validate_bool_kwarg(inplace, "inplace")
axis = self._get_axis_number(axis)
Expand All @@ -4738,7 +4738,9 @@ def sort_values(
from pandas.core.sorting import lexsort_indexer

keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position, key=key)
indexer = lexsort_indexer(
keys, orders=ascending, na_position=na_position, key=key
)
indexer = ensure_platform_int(indexer)
else:
from pandas.core.sorting import nargsort
Expand All @@ -4764,7 +4766,7 @@ def sort_values(

@Substitution(**_shared_doc_kwargs)
@Appender(NDFrame.sort_index.__doc__)
def sort_index(
def sort_index( # type: ignore
self,
axis=0,
level=None,
Expand All @@ -4774,7 +4776,7 @@ def sort_index(
na_position="last",
sort_remaining=True,
by=None,
key : Union[Callable, None] = None
key: Optional[Callable] = None,
):

# TODO: this can be combined with Series.sort_index impl as
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from datetime import datetime
import operator
from textwrap import dedent

from typing import FrozenSet, Union, Callable
from typing import Callable, FrozenSet, Optional, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -4425,7 +4424,9 @@ def asof_locs(self, where, mask):

return result

def sort_values(self, return_indexer=False, ascending=True, key : Callable = None):
def sort_values(
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
):
"""
Return a sorted copy of the index.
Expand All @@ -4439,7 +4440,7 @@ def sort_values(self, return_indexer=False, ascending=True, key : Callable = Non
ascending : bool, default True
Should the index values be sorted in an ascending order.
key : Callable, default None
Apply a key function to the indices before sorting, like
Apply a key function to the indices before sorting, like
built-in sorted function.
Returns
Expand Down
19 changes: 13 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Base and utility classes for tseries type pandas objects.
"""
import operator
from typing import Set
from typing import Callable, Optional, Set
import warnings

import numpy as np
Expand Down Expand Up @@ -279,19 +279,26 @@ def map(self, mapper, na_action=None):
except Exception:
return self.astype(object).map(mapper)

def sort_values(self, return_indexer=False, ascending=True):
def sort_values(
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
):
"""
Return sorted copy of Index.
"""
if key:
idx = self.map(key, na_action="ignore")
else:
idx = self

if return_indexer:
_as = self.argsort()
_as = idx.argsort()
if not ascending:
_as = _as[::-1]
sorted_index = self.take(_as)
return sorted_index, _as
else:
sorted_values = np.sort(self._ndarray_values)
attribs = self._get_attributes_dict()
sorted_values = np.sort(idx._ndarray_values)
attribs = self._get_attributes_dict() # type: ignore
freq = attribs["freq"]

if freq is not None and not is_period_dtype(self):
Expand All @@ -304,7 +311,7 @@ def sort_values(self, return_indexer=False, ascending=True):
if not ascending:
sorted_values = sorted_values[::-1]

return self._simple_new(sorted_values, **attribs)
return self._simple_new(sorted_values, **attribs) # type: ignore

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2883,14 +2883,14 @@ def update(self, other):
# ----------------------------------------------------------------------
# Reindexing, sorting

def sort_values(
def sort_values( # type: ignore
self,
axis=0,
ascending=True,
inplace=False,
kind="quicksort",
na_position="last",
key: Callable = None
key: Optional[Callable] = None,
):
"""
Sort by the values.
Expand Down Expand Up @@ -3081,7 +3081,7 @@ def _try_kind_sort(arr):
else:
return result.__finalize__(self)

def sort_index(
def sort_index( # type: ignore
self,
axis=0,
level=None,
Expand All @@ -3090,7 +3090,7 @@ def sort_index(
kind="quicksort",
na_position="last",
sort_remaining=True,
key : Callable = None
key: Optional[Callable] = None,
):
"""
Sort Series by index labels.
Expand Down
19 changes: 14 additions & 5 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" miscellaneous sorting / groupby utilities """
from typing import Callable, Union
from typing import Callable, Optional

import numpy as np

Expand Down Expand Up @@ -189,7 +189,8 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
return get_group_index_sorter(ids, ngroups)


def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable, None] = None):
def lexsort_indexer(keys, orders=None, na_position="last", key=None):

from pandas.core.arrays import Categorical

labels = []
Expand Down Expand Up @@ -239,7 +240,13 @@ def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable,
return indexer_from_factorized(labels, shape)


def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: Union[Callable, None] = None):
def nargsort(
items,
kind="quicksort",
ascending: bool = True,
na_position="last",
key: Optional[Callable] = None,
):
"""
This is intended to be a drop-in replacement for np.argsort which
handles NaNs. It adds ascending and na_position parameters.
Expand All @@ -260,9 +267,11 @@ def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: U
if masked.size == 0:
vals = np.array([]) # vectorize fails on empty object arrays
else:
vals = np.asarray(key_func(masked)) # revert from masked
vals = np.asarray(key_func(masked)) # revert from masked

return nargsort(vals, kind=kind, ascending=ascending, na_position=na_position, key=None)
return nargsort(
vals, kind=kind, ascending=ascending, na_position=na_position, key=None
)

idx = np.arange(len(items))
non_nans = items[~mask]
Expand Down
57 changes: 31 additions & 26 deletions pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_sort_values(self):
with pytest.raises(ValueError, match=msg):
frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5)

@pytest.fixture(params=[None, lambda x : x])
@pytest.fixture(params=[None, lambda x: x])
def key(self, request):
return request.param

Expand All @@ -93,22 +93,22 @@ def test_sort_values_inplace(self, key):
sorted_df = frame.copy()
sorted_df.sort_values(by="A", inplace=True, key=key)
expected = frame.sort_values(by="A", key=key)
assert_frame_equal(sorted_df, expected)
tm.assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=1, axis=1, inplace=True, key=key)
expected = frame.sort_values(by=1, axis=1, key=key)
assert_frame_equal(sorted_df, expected)
tm.assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by="A", ascending=False, inplace=True, key=key)
expected = frame.sort_values(by="A", ascending=False, key=key)
assert_frame_equal(sorted_df, expected)
tm.assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True, key=key)
expected = frame.sort_values(by=["A", "B"], ascending=False, key=key)
assert_frame_equal(sorted_df, expected)
tm.assert_frame_equal(sorted_df, expected)

def test_sort_nan(self):
# GH3917
Expand Down Expand Up @@ -256,15 +256,20 @@ def test_sort_multi_index_key(self):
df = DataFrame(
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
)
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[0])
result = df.set_index(list("abc")).sort_index(
level=list("ba"), key=lambda x: x[0]
)

expected = DataFrame(
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
)
expected = expected.set_index(list("abc"))
tm.assert_frame_equal(result, expected)

result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[2])
result = df.set_index(list("abc")).sort_index(
level=list("ba"), key=lambda x: x[2]
)

expected = df.set_index(list("abc"))
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -594,63 +599,63 @@ def test_sort_index_key(self):

result = df.sort_index()
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_index(key=str.lower)
expected = df.iloc[[0, 1, 5, 2, 3, 4]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_index(key=str.lower, ascending=False)
expected = df.iloc[[4, 2, 3, 0, 1, 5]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
@pytest.mark.parametrize("dtype", ["int8", "int64", "float64"])
def test_sort_index_key_int(self, dtype):
df = DataFrame(np.arange(6, dtype=dtype), index=np.arange(6, dtype=dtype))

result = df.sort_index()
assert_frame_equal(result, df)
tm.assert_frame_equal(result, df)

result = df.sort_index(key=lambda x : -x)
result = df.sort_index(key=lambda x: -x)
expected = df.sort_index(ascending=False)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_index(key=lambda x : 2 * x)
assert_frame_equal(result, df)
result = df.sort_index(key=lambda x: 2 * x)
tm.assert_frame_equal(result, df)

def test_sort_value_key(self):
df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan]))

result = df.sort_values(0)
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_values(0, key=lambda x : x + 5)
result = df.sort_values(0, key=lambda x: x + 5)
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_values(0, key=lambda x : -x, ascending=False)
result = df.sort_values(0, key=lambda x: -x, ascending=False)
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_sort_value_key_nan(self):
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))

result = df.sort_values(1)
expected = df[::-1]
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

result = df.sort_values([0, 1], key=str.lower)
assert_frame_equal(result, df)
tm.assert_frame_equal(result, df)

result = df.sort_values([0, 1], key=str.lower, ascending=False)
expected = df.sort_values(1, key=str.lower, ascending=False)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize('key', [None, lambda x : x])
@pytest.mark.parametrize("key", [None, lambda x: x])
def test_sort_value_key_empty(self, key):
df = DataFrame(np.array([]))

df.sort_values(0, key=key)
df.sort_index(key=key)

Expand Down
Loading

0 comments on commit 47f9751

Please sign in to comment.