From 3a7d82d8466903bf92c0669dfb635ddb5f9944e0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 9 Aug 2024 10:18:41 -0700
Subject: [PATCH] REF (string dtype): de-duplicate _str_map (2) (#59451)

* REF (string): de-duplicate _str_map (2)

* mypy fixup
---
 pandas/core/arrays/string_.py      | 179 +++++++++++++++--------------
 pandas/core/arrays/string_arrow.py |  56 +--------
 2 files changed, 92 insertions(+), 143 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 8a4fd9fc1b34d..94d49c4fdf6e6 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -346,6 +346,57 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
             raise ValueError
         return cls._from_sequence(scalars, dtype=dtype)
 
+    def _str_map(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        if self.dtype.na_value is np.nan:
+            return self._str_map_nan_semantics(
+                f, na_value=na_value, dtype=dtype, convert=convert
+            )
+
+        from pandas.arrays import BooleanArray
+
+        if dtype is None:
+            dtype = self.dtype
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: type[IntegerArray | BooleanArray]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            elif dtype == np.dtype("bool"):
+                # GH#55736
+                na_value = bool(na_value)
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                # error: Argument 1 to "dtype" has incompatible type
+                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
+                # "Type[object]"
+                dtype=np.dtype(cast(type, dtype)),
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        else:
+            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
+
     def _str_map_str_or_object(
         self,
         dtype,
@@ -377,6 +428,45 @@ def _str_map_str_or_object(
             # -> We don't know the result type. E.g. `.get` can return anything.
             return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
+    def _str_map_nan_semantics(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        if dtype is None:
+            dtype = self.dtype
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+        convert = convert and not np.all(mask)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                if is_integer_dtype(dtype):
+                    na_value = 0
+                else:
+                    na_value = True
+
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                dtype=np.dtype(cast(type, dtype)),
+            )
+            if na_value_is_na and mask.any():
+                if is_integer_dtype(dtype):
+                    result = result.astype("float64")
+                else:
+                    result = result.astype("object")
+                result[mask] = np.nan
+            return result
+
+        else:
+            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
+
 
 # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
 # incompatible with definition in base class "ExtensionArray"
@@ -742,95 +832,6 @@ def _cmp_method(self, other, op):
     # base class "NumpyExtensionArray" defined the type as "float")
     _str_na_value = libmissing.NA  # type: ignore[assignment]
 
-    def _str_map_nan_semantics(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if dtype is None:
-            dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-        convert = convert and not np.all(mask)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                if is_integer_dtype(dtype):
-                    na_value = 0
-                else:
-                    na_value = True
-
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                dtype=np.dtype(cast(type, dtype)),
-            )
-            if na_value_is_na and mask.any():
-                if is_integer_dtype(dtype):
-                    result = result.astype("float64")
-                else:
-                    result = result.astype("object")
-                result[mask] = np.nan
-            return result
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
-
-    def _str_map(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if self.dtype.na_value is np.nan:
-            return self._str_map_nan_semantics(
-                f, na_value=na_value, dtype=dtype, convert=convert
-            )
-
-        from pandas.arrays import BooleanArray
-
-        if dtype is None:
-            dtype = StringDtype(storage="python")
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: type[IntegerArray | BooleanArray]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            elif dtype == np.dtype("bool"):
-                na_value = bool(na_value)
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                # error: Argument 1 to "dtype" has incompatible type
-                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
-                # "Type[object]"
-                dtype=np.dtype(cast(type, dtype)),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
-
 
 class StringArrayNumpySemantics(StringArray):
     _storage = "python"
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 4893883d3ad12..fdb0259230c7f 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -279,6 +279,8 @@ def astype(self, dtype, copy: bool = True):
     # base class "ObjectStringArrayMixin" defined the type as "float")
     _str_na_value = libmissing.NA  # type: ignore[assignment]
 
+    _str_map = BaseStringArray._str_map
+
     def _str_map_nan_semantics(
         self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
     ):
@@ -318,60 +320,6 @@ def _str_map_nan_semantics(
         else:
             return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
 
-    def _str_map(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
-    ):
-        if self.dtype.na_value is np.nan:
-            return self._str_map_nan_semantics(
-                f, na_value=na_value, dtype=dtype, convert=convert
-            )
-
-        # TODO: de-duplicate with StringArray method. This method is moreless copy and
-        # paste.
-
-        from pandas.arrays import (
-            BooleanArray,
-            IntegerArray,
-        )
-
-        if dtype is None:
-            dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
-
-        mask = isna(self)
-        arr = np.asarray(self)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: type[IntegerArray | BooleanArray]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                # error: Argument 1 to "dtype" has incompatible type
-                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
-                # "Type[object]"
-                dtype=np.dtype(cast(type, dtype)),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        else:
-            return self._str_map_str_or_object(dtype, na_value, arr, f, mask, convert)
-
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
     ):