pandas-dev · jorisvandenbossche · Mar 12, 2019 · Mar 7, 2019 · Mar 11, 2019 · Mar 11, 2019
diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
@@ -32,6 +32,7 @@ Fixed Regressions
 - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
 - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
 - Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
+- Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`).
 - Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)
 
 .. _whatsnew_0242.enhancements:

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -619,13 +619,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
     if sort and len(uniques) > 0:
         from pandas.core.sorting import safe_sort
-        try:
-            order = uniques.argsort()
-            order2 = order.argsort()
-            labels = take_1d(order2, labels, fill_value=na_sentinel)
-            uniques = uniques.take(order)
-        except TypeError:
-            # Mixed types, where uniques.argsort fails.
+        if na_sentinel == -1:
+            # GH-25409 take_1d only works for na_sentinels of -1
+            try:
+                order = uniques.argsort()
+                order2 = order.argsort()
+                labels = take_1d(order2, labels, fill_value=na_sentinel)
+                uniques = uniques.take(order)
+            except TypeError:
+                # Mixed types, where uniques.argsort fails.
+                uniques, labels = safe_sort(uniques, labels,
+                                            na_sentinel=na_sentinel,
+                                            assume_unique=True)
+        else:
             uniques, labels = safe_sort(uniques, labels,
                                         na_sentinel=na_sentinel,
                                         assume_unique=True)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -326,6 +326,21 @@ def test_parametrized_factorize_na_value(self, data, na_value):
         tm.assert_numpy_array_equal(l, expected_labels)
         tm.assert_numpy_array_equal(u, expected_uniques)
 
+    @pytest.mark.parametrize('sort', [True, False])
+    @pytest.mark.parametrize('na_sentinel', [-1, -10, 100])
+    def test_factorize_na_sentinel(self, sort, na_sentinel):
+        data = np.array(['b', 'a', None, 'b'], dtype=object)
+        labels, uniques = algos.factorize(data, sort=sort,
+                                          na_sentinel=na_sentinel)
+        if sort:
+            expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
+            expected_uniques = np.array(['a', 'b'], dtype=object)
+        else:
+            expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
+            expected_uniques = np.array(['b', 'a'], dtype=object)
+        tm.assert_numpy_array_equal(labels, expected_labels)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
+
 
 class TestUnique(object):