diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 32b159983308a..f9a14a4a9e877 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -701,8 +701,7 @@ def replace_list( # TODO: avoid special-casing # GH49404 if using_cow and inplace: - # TODO(CoW): Optimize - blk = self.copy() + blk = self.copy(deep=self.refs.has_reference()) else: blk = self if inplace else self.copy() values = cast(Categorical, blk.values) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 13991ac8b81cb..c29e19a6e832f 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -112,6 +112,38 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write): assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) +def test_replace_list_categorical(using_copy_on_write): + df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") + arr = get_array(df, "a") + df.replace(["c"], value="a", inplace=True) + assert np.shares_memory(arr.codes, get_array(df, "a").codes) + if using_copy_on_write: + assert df._mgr._has_no_reference(0) + + df_orig = df.copy() + df2 = df.replace(["b"], value="a") + assert not np.shares_memory(arr.codes, get_array(df2, "a").codes) + + tm.assert_frame_equal(df, df_orig) + + +def test_replace_list_inplace_refs_categorical(using_copy_on_write): + df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") + view = df[:] + df_orig = df.copy() + df.replace(["c"], value="a", inplace=True) + if using_copy_on_write: + assert not np.shares_memory( + get_array(view, "a").codes, get_array(df, "a").codes + ) + tm.assert_frame_equal(df_orig, view) + else: + # This could be inplace + assert not np.shares_memory( + get_array(view, "a").codes, get_array(df, "a").codes + ) + + @pytest.mark.parametrize("to_replace", [1.5, [1.5], []]) def test_replace_inplace(using_copy_on_write, to_replace): df = DataFrame({"a": [1.5, 2, 3]})