From d17780b9b63e7659b4930d54753095c9d5429399 Mon Sep 17 00:00:00 2001
From: Max Bolingbroke <batterseapower@hotmail.com>
Date: Tue, 19 Feb 2019 06:20:45 +0000
Subject: [PATCH] BUG: Fix potential segfault after
 pd.Categorical(pd.Series(...), categories=...)

---
 doc/source/whatsnew/v0.24.2.rst                      |  2 ++
 pandas/core/arrays/categorical.py                    | 11 +++--------
 pandas/tests/arrays/categorical/test_constructors.py | 11 +++++++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
index a7e522d27f8e26..4cd5c49eec0089 100644
--- a/doc/source/whatsnew/v0.24.2.rst
+++ b/doc/source/whatsnew/v0.24.2.rst
@@ -30,6 +30,8 @@ Fixed Regressions
 - Fixed regression in :class:`TimedeltaIndex` where `np.sum(index)` incorrectly returned a zero-dimensional object instead of a scalar (:issue:`25282`)
 - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
 
+- Fixed regression in ``Categorical``, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
+
 .. _whatsnew_0242.enhancements:
 
 Enhancements
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 79e565df94eaea..d8274632ef3a8d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -323,14 +323,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
         # we may have dtype.categories be None, and we need to
         # infer categories in a factorization step futher below
 
-        if is_categorical(values):
-            # GH23814, for perf, if values._values already an instance of
-            # Categorical, set values to codes, and run fastpath
-            if (isinstance(values, (ABCSeries, ABCIndexClass)) and
-               isinstance(values._values, type(self))):
-                values = values._values.codes.copy()
-                fastpath = True
-
         if fastpath:
             self._codes = coerce_indexer_dtype(values, dtype.categories)
             self._dtype = self._dtype.update_dtype(dtype)
@@ -2625,6 +2617,9 @@ def _recode_for_categories(codes, old_categories, new_categories):
     if len(old_categories) == 0:
         # All null anyway, so just retain the nulls
         return codes.copy()
+    elif new_categories.equals(old_categories):
+        # Same categories, so no need to actually recode
+        return codes.copy()
     indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
                                    new_categories)
     new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 25c299692ceca3..1738ca44c12225 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -149,6 +149,17 @@ def test_constructor(self):
                          categories=["a", "b", "c", "d"])
         tm.assert_categorical_equal(c1, c2)
 
+        # GH25318: constructing with pd.Series used to bogusly skip recoding
+        # categories
+        c0 = Categorical(["a", "b", "c", "a"])
+        c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])
+
+        c2 = Categorical(c0, categories=c1.categories)
+        tm.assert_categorical_equal(c1, c2)
+
+        c3 = Categorical(Series(c0), categories=c1.categories)
+        tm.assert_categorical_equal(c1, c3)
+
         # This should result in integer categories, not float!
         cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
         assert is_integer_dtype(cat.categories)