diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0fc4afc95a2ce..24bed22b3a3fe 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -266,6 +266,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`) +- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`) - Missing diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1d5db123068e2..74e785be06356 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -68,6 +68,7 @@ PeriodDtype, ) from pandas.core.dtypes.generic import ( + ABCExtensionArray, ABCIndex, ABCSeries, ) @@ -1772,6 +1773,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return casted raise LossySetitemError + elif isinstance(element, ABCExtensionArray) and isinstance( + element.dtype, CategoricalDtype + ): + # GH#52927 setting Categorical value into non-EA frame + # TODO: general-case for EAs? + try: + casted = element.astype(dtype) + except (ValueError, TypeError): + raise LossySetitemError + # Check for cases of either + # a) lossy overflow/rounding or + # b) semantic changes like dt64->int64 + comp = casted == element + if not comp.all(): + raise LossySetitemError + return casted + # Anything other than integer we cannot hold raise LossySetitemError if ( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 66b01dfb59f7f..30f6507d02484 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -460,6 +460,12 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: and will receive the same block """ new_dtype = find_result_type(self.values.dtype, other) + if new_dtype == self.dtype: + # GH#52927 avoid RecursionError + raise AssertionError( + "Something has gone wrong, please report a bug at " + "https://github.com/pandas-dev/pandas/issues" + ) # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d0b6adfda0241..a2693c85e507f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1657,6 +1657,14 @@ def test_loc_setitem_range_key(self, frame_or_series): expected = frame_or_series([0, 1, 10, 9, 11], index=obj.index) tm.assert_equal(obj, expected) + def test_loc_setitem_numpy_frame_categorical_value(self): + # GH#52927 + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]}) + df.loc[1:2, "a"] = Categorical([2, 2], categories=[1, 2]) + + expected = DataFrame({"a": [1, 2, 2, 1, 1], "b": ["a", "a", "a", "a", "a"]}) + tm.assert_frame_equal(df, expected) + class TestLocWithEllipsis: @pytest.fixture(params=[tm.loc, tm.iloc])