Skip to content

Commit

Permalink
Backport PR pandas-dev#25592: BUG: fix usage of na_sentinel with sort…
Browse files Browse the repository at this point in the history
…=True in factorize()
  • Loading branch information
jorisvandenbossche authored and MeeseeksDev[bot] committed Mar 12, 2019
1 parent c53c9d1 commit 5ce08c2
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Fixed Regressions
- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
- Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
- Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`).
- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)

.. _whatsnew_0242.bug_fixes:
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,13 +614,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):

if sort and len(uniques) > 0:
from pandas.core.sorting import safe_sort
try:
order = uniques.argsort()
order2 = order.argsort()
labels = take_1d(order2, labels, fill_value=na_sentinel)
uniques = uniques.take(order)
except TypeError:
# Mixed types, where uniques.argsort fails.
if na_sentinel == -1:
# GH-25409 take_1d only works for na_sentinels of -1
try:
order = uniques.argsort()
order2 = order.argsort()
labels = take_1d(order2, labels, fill_value=na_sentinel)
uniques = uniques.take(order)
except TypeError:
# Mixed types, where uniques.argsort fails.
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
else:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,21 @@ def test_parametrized_factorize_na_value(self, data, na_value):
tm.assert_numpy_array_equal(l, expected_labels)
tm.assert_numpy_array_equal(u, expected_uniques)

@pytest.mark.parametrize('sort', [True, False])
@pytest.mark.parametrize('na_sentinel', [-1, -10, 100])
def test_factorize_na_sentinel(self, sort, na_sentinel):
data = np.array(['b', 'a', None, 'b'], dtype=object)
labels, uniques = algos.factorize(data, sort=sort,
na_sentinel=na_sentinel)
if sort:
expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
expected_uniques = np.array(['a', 'b'], dtype=object)
else:
expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
expected_uniques = np.array(['b', 'a'], dtype=object)
tm.assert_numpy_array_equal(labels, expected_labels)
tm.assert_numpy_array_equal(uniques, expected_uniques)


class TestUnique(object):

Expand Down

0 comments on commit 5ce08c2

Please sign in to comment.