From 631ab35ecd469c543ce740ad493dee44e167eabc Mon Sep 17 00:00:00 2001
From: Harsha Lakamsani <harsha.lakamsani@gmail.com>
Date: Sun, 25 Aug 2024 10:52:12 -0700
Subject: [PATCH 001/176] DOCS: fix docstring validation errors for
 pandas.Series (#59602)

* DOCS: pandas.Series.prod + pandas.Series.product RT03 docstring validation error fixed

* DOCS: pandas.Series.pop SA01 + pandas.Series.reorder_levels RT03/SA01 docstring validation error fixed

* DOCS: pandas.Series.list.__getitem__ + pandas.Series.list.flatten + pandas.Series.list.len SA01 docstring validation error fixed

* DOCS: pandas.Series.sparse.density SA01 docstring validation error fixed

* DOCS: pandas.Series.gt + pandas.Series.lt + pandas.Series.ne SA01 docstring validation error fixed

* linting issues leftover from docstring validation fixes resolved
---
 ci/code_checks.sh                     | 11 -----------
 pandas/core/arrays/arrow/accessors.py | 14 ++++++++++++++
 pandas/core/arrays/sparse/array.py    |  5 +++++
 pandas/core/generic.py                |  2 ++
 pandas/core/ops/docstrings.py         |  6 +++---
 pandas/core/series.py                 | 13 ++++++++++++-
 6 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index e9f4ee1f391a2..0cb2df7bb334b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -133,20 +133,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.tz_convert PR01,PR02" \
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
-        -i "pandas.Series.gt SA01" \
-        -i "pandas.Series.list.__getitem__ SA01" \
-        -i "pandas.Series.list.flatten SA01" \
-        -i "pandas.Series.list.len SA01" \
-        -i "pandas.Series.lt SA01" \
-        -i "pandas.Series.ne SA01" \
         -i "pandas.Series.pad PR01,SA01" \
-        -i "pandas.Series.pop SA01" \
-        -i "pandas.Series.prod RT03" \
-        -i "pandas.Series.product RT03" \
-        -i "pandas.Series.reorder_levels RT03,SA01" \
         -i "pandas.Series.sem PR01,RT03,SA01" \
         -i "pandas.Series.sparse PR01,SA01" \
-        -i "pandas.Series.sparse.density SA01" \
         -i "pandas.Series.sparse.fill_value SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index d8f948a37d206..aea162461d3c1 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -92,6 +92,12 @@ def len(self) -> Series:
         pandas.Series
             The length of each list.
 
+        See Also
+        --------
+        str.len : Python built-in function returning the length of an object.
+        Series.size : Returns the length of the Series.
+        StringMethods.len : Compute the length of each element in the Series/Index.
+
         Examples
         --------
         >>> import pyarrow as pa
@@ -128,6 +134,10 @@ def __getitem__(self, key: int | slice) -> Series:
         pandas.Series
             The list at requested index.
 
+        See Also
+        --------
+        ListAccessor.flatten : Flatten list values.
+
         Examples
         --------
         >>> import pyarrow as pa
@@ -187,6 +197,10 @@ def flatten(self) -> Series:
         pandas.Series
             The data from all lists in the series flattened.
 
+        See Also
+        --------
+        ListAccessor.__getitem__ : Index or slice values in the Series.
+
         Examples
         --------
         >>> import pyarrow as pa
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 3a08344369822..a09dc20af3b36 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -671,6 +671,11 @@ def density(self) -> float:
         """
         The percent of non- ``fill_value`` points, as decimal.
 
+        See Also
+        --------
+        DataFrame.sparse.from_spmatrix : Create a new DataFrame from a
+            scipy sparse matrix.
+
         Examples
         --------
         >>> from pandas.arrays import SparseArray
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index cdc8642c9c70e..61fa5c49a8c5b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11815,6 +11815,8 @@ def last_valid_index(self) -> Hashable:
 Returns
 -------
 {name1} or scalar\
+
+    Value containing the calculation referenced in the description.\
 {see_also}\
 {examples}
 """
diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py
index 0ad6db0aefe9c..5ce0a2da86f31 100644
--- a/pandas/core/ops/docstrings.py
+++ b/pandas/core/ops/docstrings.py
@@ -376,7 +376,7 @@ def make_flex_doc(op_name: str, typ: str) -> str:
     "ne": {
         "op": "!=",
         "desc": "Not equal to",
-        "reverse": None,
+        "reverse": "eq",
         "series_examples": _ne_example_SERIES,
         "series_returns": _returns_series,
     },
@@ -397,14 +397,14 @@ def make_flex_doc(op_name: str, typ: str) -> str:
     "gt": {
         "op": ">",
         "desc": "Greater than",
-        "reverse": None,
+        "reverse": "lt",
         "series_examples": _gt_example_SERIES,
         "series_returns": _returns_series,
     },
     "ge": {
         "op": ">=",
         "desc": "Greater than or equal to",
-        "reverse": None,
+        "reverse": "le",
         "series_examples": _ge_example_SERIES,
         "series_returns": _returns_series,
     },
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5c35c6c0d6d23..ed27984526fa5 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4093,7 +4093,13 @@ def reorder_levels(self, order: Sequence[Level]) -> Series:
 
         Returns
         -------
-        type of caller (new object)
+        Series
+            Type of caller with index as MultiIndex (new object).
+
+        See Also
+        --------
+        DataFrame.reorder_levels : Rearrange index or column levels using
+            input ``order``.
 
         Examples
         --------
@@ -5048,6 +5054,11 @@ def pop(self, item: Hashable) -> Any:
         scalar
             Value that is popped from series.
 
+        See Also
+        --------
+        Series.drop: Drop specified values from Series.
+        Series.drop_duplicates: Return Series with duplicate values removed.
+
         Examples
         --------
         >>> ser = pd.Series([1, 2, 3])

From dca2635d0ab246b45c5edf6575e2d5fc20751023 Mon Sep 17 00:00:00 2001
From: Florian Bourgey <bourgeyflorian@gmail.com>
Date: Sun, 25 Aug 2024 13:53:53 -0400
Subject: [PATCH 002/176] Add example same correlation in pandas.Series.corr
 documentation (#59591)

---
 pandas/core/series.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index ed27984526fa5..d944d1ce819b6 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2619,6 +2619,13 @@ def corr(
         >>> s2 = pd.Series([1, 2, 3], index=[2, 1, 0])
         >>> s1.corr(s2)
         -1.0
+
+        If the input is a constant array, the correlation is not defined in this case,
+        and ``np.nan`` is returned.
+
+        >>> s1 = pd.Series([0.45, 0.45])
+        >>> s1.corr(s1)
+        nan
         """  # noqa: E501
         this, other = self.align(other, join="inner")
         if len(this) == 0:

From 2130a99d1f3ffaf871bea5c40f1aa5ef59659687 Mon Sep 17 00:00:00 2001
From: Ankit Dhokariya <67553771+ankit-dhokariya@users.noreply.github.com>
Date: Sun, 25 Aug 2024 10:56:10 -0700
Subject: [PATCH 003/176] DOC: Enforce Numpy Docstring Validation (Issue
 #59458) (#59590)

* adding docstring for pandas.Timestamp.day property

* fixing type annotation
---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timestamps.pyx | 23 +++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 0cb2df7bb334b..bffac19b1b128 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -168,7 +168,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
-        -i "pandas.Timestamp.day GL08" \
         -i "pandas.Timestamp.fold GL08" \
         -i "pandas.Timestamp.hour GL08" \
         -i "pandas.Timestamp.max PR02" \
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 3268207b667f2..a9463ce8ad044 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -961,6 +961,29 @@ cdef class _Timestamp(ABCTimestamp):
         """
         return ((self.month - 1) // 3) + 1
 
+    @property
+    def day(self) -> int:
+        """
+        Return the day of the Timestamp.
+
+        Returns
+        -------
+        int
+            The day of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.week : Return the week number of the year.
+        Timestamp.weekday : Return the day of the week.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.day
+        31
+        """
+        return super().day
+
     @property
     def week(self) -> int:
         """

From fe42b3b234f6b513da68f98b648d60d9f9e66e30 Mon Sep 17 00:00:00 2001
From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
Date: Sun, 25 Aug 2024 18:57:31 +0100
Subject: [PATCH 004/176] DOCS: fix docstring validation errors for
 pandas.Series (#59596)

Fixes:
-i "pandas.Series.str.match RT03" \
-i "pandas.Series.str.normalize RT03,SA01" \
-i "pandas.Series.str.repeat SA01" \
-i "pandas.Series.str.replace SA01" \
---
 ci/code_checks.sh               |  4 ----
 pandas/core/strings/accessor.py | 38 +++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index bffac19b1b128..25d68cdf41095 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -142,10 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
         -i "pandas.Series.std PR01,RT03,SA01" \
-        -i "pandas.Series.str.match RT03" \
-        -i "pandas.Series.str.normalize RT03,SA01" \
-        -i "pandas.Series.str.repeat SA01" \
-        -i "pandas.Series.str.replace SA01" \
         -i "pandas.Series.str.wrap RT03,SA01" \
         -i "pandas.Series.str.zfill RT03" \
         -i "pandas.Series.struct.dtypes SA01" \
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 1014c9559afaf..c88270b2a2f16 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1379,6 +1379,9 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
         Returns
         -------
         Series/Index/array of boolean values
+            A Series, Index, or array of boolean values indicating whether the start
+            of each string matches the pattern. The result will be of the same type
+            as the input.
 
         See Also
         --------
@@ -1503,6 +1506,14 @@ def replace(
             * if `pat` is a compiled regex and `case` or `flags` is set
             * if `pat` is a dictionary and `repl` is not None.
 
+        See Also
+        --------
+        Series.str.replace : Method to replace occurrences of a substring with another
+            substring.
+        Series.str.extract : Extract substrings using a regular expression.
+        Series.str.findall : Find all occurrences of a pattern or regex in each string.
+        Series.str.split : Split each string by a specified delimiter or pattern.
+
         Notes
         -----
         When `pat` is a compiled regex, all flags should be included in the
@@ -1634,6 +1645,20 @@ def repeat(self, repeats):
             Series or Index of repeated string objects specified by
             input parameter repeats.
 
+        See Also
+        --------
+        Series.str.lower : Convert all characters in each string to lowercase.
+        Series.str.upper : Convert all characters in each string to uppercase.
+        Series.str.title : Convert each string to title case (capitalizing the first
+            letter of each word).
+        Series.str.strip : Remove leading and trailing whitespace from each string.
+        Series.str.replace : Replace occurrences of a substring with another substring
+            in each string.
+        Series.str.ljust : Left-justify each string in the Series/Index by padding with
+            a specified character.
+        Series.str.rjust : Right-justify each string in the Series/Index by padding with
+            a specified character.
+
         Examples
         --------
         >>> s = pd.Series(["a", "b", "c"])
@@ -3091,6 +3116,19 @@ def normalize(self, form):
         Returns
         -------
         Series/Index of objects
+            A Series or Index of strings in the same Unicode form specified by `form`.
+            The returned object retains the same type as the input (Series or Index),
+            and contains the normalized strings.
+
+        See Also
+        --------
+        Series.str.upper : Convert all characters in each string to uppercase.
+        Series.str.lower : Convert all characters in each string to lowercase.
+        Series.str.title : Convert each string to title case (capitalizing the
+            first letter of each word).
+        Series.str.strip : Remove leading and trailing whitespace from each string.
+        Series.str.replace : Replace occurrences of a substring with another substring
+            in each string.
 
         Examples
         --------

From 90e8e04c2d305624d2d0b68087e65b6aace7ef1f Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Sun, 25 Aug 2024 10:59:50 -0700
Subject: [PATCH 005/176] MAINT: update vendored version util from packaging
 (#59558)

* MAINT: update vendored version util from packaging

* fix docstring

* fix docstring

* skip docstring validation

* ignore

* fix validation ignore

* remove docstring

* rollback

* add comments
---
 pandas/util/version/__init__.py | 238 +++++++-------------------------
 1 file changed, 51 insertions(+), 187 deletions(-)

diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py
index 9838e371f0d00..b5d975a0db1d8 100644
--- a/pandas/util/version/__init__.py
+++ b/pandas/util/version/__init__.py
@@ -1,27 +1,22 @@
-# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py
-# and https://github.com/pypa/packaging/blob/main/packaging/_structures.py
-# changeset ae891fd74d6dd4c6063bb04f2faeadaac6fc6313
-# 04/30/2021
+# Vendored from https://github.com/pypa/packaging/blob/main/src/packaging/_structures.py
+# and https://github.com/pypa/packaging/blob/main/src/packaging/version.py
+# changeset 24e5350b2ff3c5c7a36676c2af5f2cb39fd1baf8
 
 # This file is dual licensed under the terms of the Apache License, Version
 # 2.0, and the BSD License. Licence at LICENSES/PACKAGING_LICENSE
 from __future__ import annotations
 
-import collections
-from collections.abc import (
-    Callable,
-    Iterator,
-)
+from collections.abc import Callable
 import itertools
 import re
 from typing import (
+    Any,
+    NamedTuple,
     SupportsInt,
-    Tuple,
     Union,
 )
-import warnings
 
-__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"]
+__all__ = ["VERSION_PATTERN", "InvalidVersion", "Version", "parse"]
 
 
 class InfinityType:
@@ -40,9 +35,6 @@ def __le__(self, other: object) -> bool:
     def __eq__(self, other: object) -> bool:
         return isinstance(other, type(self))
 
-    def __ne__(self, other: object) -> bool:
-        return not isinstance(other, type(self))
-
     def __gt__(self, other: object) -> bool:
         return True
 
@@ -72,9 +64,6 @@ def __le__(self, other: object) -> bool:
     def __eq__(self, other: object) -> bool:
         return isinstance(other, type(self))
 
-    def __ne__(self, other: object) -> bool:
-        return not isinstance(other, type(self))
-
     def __gt__(self, other: object) -> bool:
         return False
 
@@ -88,45 +77,39 @@ def __neg__(self: object) -> InfinityType:
 NegativeInfinity = NegativeInfinityType()
 
 
-InfiniteTypes = Union[InfinityType, NegativeInfinityType]
-PrePostDevType = Union[InfiniteTypes, tuple[str, int]]
-SubLocalType = Union[InfiniteTypes, int, str]
-LocalType = Union[
+LocalType = tuple[Union[int, str], ...]
+
+CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, tuple[str, int]]
+CmpLocalType = Union[
     NegativeInfinityType,
-    tuple[
-        Union[
-            SubLocalType,
-            tuple[SubLocalType, str],
-            tuple[NegativeInfinityType, SubLocalType],
-        ],
-        ...,
-    ],
+    tuple[Union[tuple[int, str], tuple[NegativeInfinityType, Union[int, str]]], ...],
 ]
 CmpKey = tuple[
-    int, tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
-]
-LegacyCmpKey = tuple[int, tuple[str, ...]]
-VersionComparisonMethod = Callable[
-    [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
+    int,
+    tuple[int, ...],
+    CmpPrePostDevType,
+    CmpPrePostDevType,
+    CmpPrePostDevType,
+    CmpLocalType,
 ]
+VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool]
 
-_Version = collections.namedtuple(
-    "_Version", ["epoch", "release", "dev", "pre", "post", "local"]
-)
 
+class _Version(NamedTuple):
+    epoch: int
+    release: tuple[int, ...]
+    dev: tuple[str, int] | None
+    pre: tuple[str, int] | None
+    post: tuple[str, int] | None
+    local: LocalType | None
 
-def parse(version: str) -> LegacyVersion | Version:
-    """
-    Parse the given version string and return either a :class:`Version` object
-    or a :class:`LegacyVersion` object depending on if the given version is
-    a valid PEP 440 version or a legacy version.
-    """
-    try:
-        return Version(version)
-    except InvalidVersion:
-        return LegacyVersion(version)
+
+def parse(version: str) -> Version:
+    return Version(version)
 
 
+# The docstring is from an older version of the packaging library to avoid
+# errors in the docstring validation.
 class InvalidVersion(ValueError):
     """
     An invalid version was found, users should refer to PEP 440.
@@ -140,7 +123,7 @@ class InvalidVersion(ValueError):
 
 
 class _BaseVersion:
-    _key: CmpKey | LegacyCmpKey
+    _key: tuple[Any, ...]
 
     def __hash__(self) -> int:
         return hash(self._key)
@@ -185,132 +168,16 @@ def __ne__(self, other: object) -> bool:
         return self._key != other._key
 
 
-class LegacyVersion(_BaseVersion):
-    def __init__(self, version: str) -> None:
-        self._version = str(version)
-        self._key = _legacy_cmpkey(self._version)
-
-        warnings.warn(
-            "Creating a LegacyVersion has been deprecated and will be "
-            "removed in the next major release.",
-            DeprecationWarning,
-        )
-
-    def __str__(self) -> str:
-        return self._version
-
-    def __repr__(self) -> str:
-        return f"<LegacyVersion('{self}')>"
-
-    @property
-    def public(self) -> str:
-        return self._version
-
-    @property
-    def base_version(self) -> str:
-        return self._version
-
-    @property
-    def epoch(self) -> int:
-        return -1
-
-    @property
-    def release(self) -> None:
-        return None
-
-    @property
-    def pre(self) -> None:
-        return None
-
-    @property
-    def post(self) -> None:
-        return None
-
-    @property
-    def dev(self) -> None:
-        return None
-
-    @property
-    def local(self) -> None:
-        return None
-
-    @property
-    def is_prerelease(self) -> bool:
-        return False
-
-    @property
-    def is_postrelease(self) -> bool:
-        return False
-
-    @property
-    def is_devrelease(self) -> bool:
-        return False
-
-
-_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
-
-_legacy_version_replacement_map = {
-    "pre": "c",
-    "preview": "c",
-    "-": "final-",
-    "rc": "c",
-    "dev": "@",
-}
-
-
-def _parse_version_parts(s: str) -> Iterator[str]:
-    for part in _legacy_version_component_re.split(s):
-        mapped_part = _legacy_version_replacement_map.get(part, part)
-
-        if not mapped_part or mapped_part == ".":
-            continue
-
-        if mapped_part[:1] in "0123456789":
-            # pad for numeric comparison
-            yield mapped_part.zfill(8)
-        else:
-            yield "*" + mapped_part
-
-    # ensure that alpha/beta/candidate are before final
-    yield "*final"
-
-
-def _legacy_cmpkey(version: str) -> LegacyCmpKey:
-    # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
-    # greater than or equal to 0. This will effectively put the LegacyVersion,
-    # which uses the defacto standard originally implemented by setuptools,
-    # as before all PEP 440 versions.
-    epoch = -1
-
-    # This scheme is taken from pkg_resources.parse_version setuptools prior to
-    # it's adoption of the packaging library.
-    parts: list[str] = []
-    for part in _parse_version_parts(version.lower()):
-        if part.startswith("*"):
-            # remove "-" before a prerelease tag
-            if part < "*final":
-                while parts and parts[-1] == "*final-":
-                    parts.pop()
-
-            # remove trailing zeros from each series of numeric parts
-            while parts and parts[-1] == "00000000":
-                parts.pop()
-
-        parts.append(part)
-
-    return epoch, tuple(parts)
-
-
 # Deliberately not anchored to the start and end of the string, to make it
 # easier for 3rd party code to reuse
-VERSION_PATTERN = r"""
+_VERSION_PATTERN = r"""
     v?
     (?:
         (?:(?P<epoch>[0-9]+)!)?                           # epoch
         (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
         (?P<pre>                                          # pre-release
             [-_\.]?
-            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
+            (?P<pre_l>alpha|a|beta|b|preview|pre|c|rc)
             [-_\.]?
             (?P<pre_n>[0-9]+)?
         )?
@@ -334,9 +201,12 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:
     (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
 """
 
+VERSION_PATTERN = _VERSION_PATTERN
+
 
 class Version(_BaseVersion):
     _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
 
     def __init__(self, version: str) -> None:
         # Validate the version and parse it into pieces
@@ -377,11 +247,11 @@ def __str__(self) -> str:
             parts.append(f"{self.epoch}!")
 
         # Release segment
-        parts.append(".".join([str(x) for x in self.release]))
+        parts.append(".".join(str(x) for x in self.release))
 
         # Pre-release
         if self.pre is not None:
-            parts.append("".join([str(x) for x in self.pre]))
+            parts.append("".join(str(x) for x in self.pre))
 
         # Post-release
         if self.post is not None:
@@ -399,18 +269,15 @@ def __str__(self) -> str:
 
     @property
     def epoch(self) -> int:
-        _epoch: int = self._version.epoch
-        return _epoch
+        return self._version.epoch
 
     @property
     def release(self) -> tuple[int, ...]:
-        _release: tuple[int, ...] = self._version.release
-        return _release
+        return self._version.release
 
     @property
     def pre(self) -> tuple[str, int] | None:
-        _pre: tuple[str, int] | None = self._version.pre
-        return _pre
+        return self._version.pre
 
     @property
     def post(self) -> int | None:
@@ -423,7 +290,7 @@ def dev(self) -> int | None:
     @property
     def local(self) -> str | None:
         if self._version.local:
-            return ".".join([str(x) for x in self._version.local])
+            return ".".join(str(x) for x in self._version.local)
         else:
             return None
 
@@ -440,7 +307,7 @@ def base_version(self) -> str:
             parts.append(f"{self.epoch}!")
 
         # Release segment
-        parts.append(".".join([str(x) for x in self.release]))
+        parts.append(".".join(str(x) for x in self.release))
 
         return "".join(parts)
 
@@ -470,7 +337,7 @@ def micro(self) -> int:
 
 
 def _parse_letter_version(
-    letter: str, number: str | bytes | SupportsInt
+    letter: str | None, number: str | bytes | SupportsInt | None
 ) -> tuple[str, int] | None:
     if letter:
         # We consider there to be an implicit 0 in a pre-release if there is
@@ -507,10 +374,7 @@ def _parse_letter_version(
 _local_version_separators = re.compile(r"[\._-]")
 
 
-def _parse_local_version(local: str) -> LocalType | None:
-    """
-    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
-    """
+def _parse_local_version(local: str | None) -> LocalType | None:
     if local is not None:
         return tuple(
             part.lower() if not part.isdigit() else int(part)
@@ -525,7 +389,7 @@ def _cmpkey(
     pre: tuple[str, int] | None,
     post: tuple[str, int] | None,
     dev: tuple[str, int] | None,
-    local: tuple[SubLocalType] | None,
+    local: LocalType | None,
 ) -> CmpKey:
     # When we compare a release version, we want to compare it with all of the
     # trailing zeros removed. So we'll use a reverse the list, drop all the now
@@ -541,7 +405,7 @@ def _cmpkey(
     # if there is not a pre or a post segment. If we have one of those then
     # the normal sorting rules will handle this case correctly.
     if pre is None and post is None and dev is not None:
-        _pre: PrePostDevType = NegativeInfinity
+        _pre: CmpPrePostDevType = NegativeInfinity
     # Versions without a pre-release (except as noted above) should sort after
     # those with one.
     elif pre is None:
@@ -551,21 +415,21 @@ def _cmpkey(
 
     # Versions without a post segment should sort before those with one.
     if post is None:
-        _post: PrePostDevType = NegativeInfinity
+        _post: CmpPrePostDevType = NegativeInfinity
 
     else:
         _post = post
 
     # Versions without a development segment should sort after those with one.
     if dev is None:
-        _dev: PrePostDevType = Infinity
+        _dev: CmpPrePostDevType = Infinity
 
     else:
         _dev = dev
 
     if local is None:
         # Versions without a local segment should sort before those with one.
-        _local: LocalType = NegativeInfinity
+        _local: CmpLocalType = NegativeInfinity
     else:
         # Versions with a local segment need that segment parsed to implement
         # the sorting rules in PEP440.

From 50c30324cc5ad555e6f40174f066626c630660c6 Mon Sep 17 00:00:00 2001
From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
Date: Sun, 25 Aug 2024 19:05:19 +0100
Subject: [PATCH 006/176] DOC: Enforce Numpy Docstring Validation |
 pandas.api.extensions.ExtensionArray (#59407)

* Fix pandas.api.extensions.ExtensionArray._pad_or_backfill

Add 'limit_area' parameter, return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray._reduce

Add return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray._values_for_factorize

Add 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.astype

Add 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.dropna

Add return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.dtype

Add 'See Also' section
---
 ci/code_checks.sh          |  6 ---
 pandas/core/arrays/base.py | 77 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 25d68cdf41095..1594055f4572a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -177,12 +177,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.extensions.ExtensionArray._pad_or_backfill PR01,RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray._reduce RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray._values_for_factorize SA01" \
-        -i "pandas.api.extensions.ExtensionArray.astype SA01" \
-        -i "pandas.api.extensions.ExtensionArray.dropna RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.dtype SA01" \
         -i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
         -i "pandas.api.extensions.ExtensionArray.fillna SA01" \
         -i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index a0c318409d6bb..f05d1ae18c604 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -608,6 +608,14 @@ def dtype(self) -> ExtensionDtype:
         """
         An instance of ExtensionDtype.
 
+        See Also
+        --------
+        api.extensions.ExtensionDtype : Base class for extension dtypes.
+        api.extensions.ExtensionArray : Base class for extension array types.
+        api.extensions.ExtensionArray.dtype : The dtype of an ExtensionArray.
+        Series.dtype : The dtype of a Series.
+        DataFrame.dtype : The dtype of a DataFrame.
+
         Examples
         --------
         >>> pd.array([1, 2, 3]).dtype
@@ -713,6 +721,16 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``,
             otherwise a Numpy ndarray with ``dtype`` for its dtype.
 
+        See Also
+        --------
+        Series.astype : Cast a Series to a different dtype.
+        DataFrame.astype : Cast a DataFrame to a different dtype.
+        api.extensions.ExtensionArray : Base class for ExtensionArray objects.
+        core.arrays.DatetimeArray._from_sequence : Create a DatetimeArray from a
+            sequence.
+        core.arrays.TimedeltaArray._from_sequence : Create a TimedeltaArray from
+            a sequence.
+
         Examples
         --------
         >>> arr = pd.array([1, 2, 3])
@@ -1032,6 +1050,12 @@ def _pad_or_backfill(
             maximum number of entries along the entire axis where NaNs will be
             filled.
 
+        limit_area : {'inside', 'outside'} or None, default None
+            Specifies which area to limit filling.
+            - 'inside': Limit the filling to the area within the gaps.
+            - 'outside': Limit the filling to the area outside the gaps.
+            If `None`, no limitation is applied.
+
         copy : bool, default True
             Whether to make a copy of the data before filling. If False, then
             the original should be modified and no new memory should be allocated.
@@ -1043,6 +1067,16 @@ def _pad_or_backfill(
         Returns
         -------
         Same type as self
+            The filled array with the same type as the original.
+
+        See Also
+        --------
+        Series.ffill : Forward fill missing values.
+        Series.bfill : Backward fill missing values.
+        DataFrame.ffill : Forward fill missing values in DataFrame.
+        DataFrame.bfill : Backward fill missing values in DataFrame.
+        api.types.isna : Check for missing values.
+        api.types.isnull : Check for missing values.
 
         Examples
         --------
@@ -1149,6 +1183,16 @@ def dropna(self) -> Self:
 
         Returns
         -------
+        Self
+            An ExtensionArray of the same type as the original but with all
+            NA values removed.
+
+        See Also
+        --------
+        Series.dropna : Remove missing values from a Series.
+        DataFrame.dropna : Remove missing values from a DataFrame.
+        api.extensions.ExtensionArray.isna : Check for missing values in
+            an ExtensionArray.
 
         Examples
         --------
@@ -1423,6 +1467,10 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
             `-1` and not included in `uniques`. By default,
             ``np.nan`` is used.
 
+        See Also
+        --------
+        util.hash_pandas_object : Hash the pandas object.
+
         Notes
         -----
         The values returned by this method are also used in
@@ -1988,16 +2036,43 @@ def _reduce(
 
         Returns
         -------
-        scalar
+        scalar or ndarray:
+            The result of the reduction operation. The type of the result
+            depends on `keepdims`:
+            - If `keepdims` is `False`, a scalar value is returned.
+            - If `keepdims` is `True`, the result is wrapped in a numpy array with
+            a single element.
 
         Raises
         ------
         TypeError : subclass does not define operations
 
+        See Also
+        --------
+        Series.min : Return the minimum value.
+        Series.max : Return the maximum value.
+        Series.sum : Return the sum of values.
+        Series.mean : Return the mean of values.
+        Series.median : Return the median of values.
+        Series.std : Return the standard deviation.
+        Series.var : Return the variance.
+        Series.prod : Return the product of values.
+        Series.sem : Return the standard error of the mean.
+        Series.kurt : Return the kurtosis.
+        Series.skew : Return the skewness.
+
         Examples
         --------
         >>> pd.array([1, 2, 3])._reduce("min")
         1
+        >>> pd.array([1, 2, 3])._reduce("max")
+        3
+        >>> pd.array([1, 2, 3])._reduce("sum")
+        6
+        >>> pd.array([1, 2, 3])._reduce("mean")
+        2.0
+        >>> pd.array([1, 2, 3])._reduce("median")
+        2.0
         """
         meth = getattr(self, name, None)
         if meth is None:

From 360597c349f4309364af0d5ac3bab158fd83d9fa Mon Sep 17 00:00:00 2001
From: Alex <mingzhangliu@gmail.com>
Date: Sun, 25 Aug 2024 18:24:30 -0400
Subject: [PATCH 007/176] DOCS: fix docstring validation errors for
 pandas.Series (#59600)

* DOCS: fix docstring validation errors for pandas.Series

* DOCS: fix underline length
---
 ci/code_checks.sh                     | 2 --
 pandas/core/arrays/arrow/accessors.py | 4 ++++
 pandas/core/series.py                 | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 1594055f4572a..2d260c78a8f33 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -144,8 +144,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.std PR01,RT03,SA01" \
         -i "pandas.Series.str.wrap RT03,SA01" \
         -i "pandas.Series.str.zfill RT03" \
-        -i "pandas.Series.struct.dtypes SA01" \
-        -i "pandas.Series.to_markdown SA01" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index aea162461d3c1..d9a80b699b0bb 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -258,6 +258,10 @@ def dtypes(self) -> Series:
         pandas.Series
             The data type of each child field.
 
+        See Also
+        --------
+        Series.dtype: Return the dtype object of the underlying data.
+
         Examples
         --------
         >>> import pyarrow as pa
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d944d1ce819b6..17494f948876a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1617,6 +1617,11 @@ def to_markdown(
         str
             {klass} in Markdown-friendly format.
 
+        See Also
+        --------
+        Series.to_frame : Rrite a text representation of object to the system clipboard.
+        Series.to_latex : Render Series to LaTeX-formatted table.
+
         Notes
         -----
         Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.

From 55441d313c0d5c8e23558734bc20681c1a31378a Mon Sep 17 00:00:00 2001
From: wenchen-cai <wenchencai57@gmail.com>
Date: Tue, 27 Aug 2024 00:23:26 +0800
Subject: [PATCH 008/176] DOCS: fix docstring validation errors for
 pandas.Series.str (#59597)

---
 ci/code_checks.sh               | 2 --
 pandas/core/strings/accessor.py | 8 ++++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2d260c78a8f33..916720e5a01e3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -142,8 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
         -i "pandas.Series.std PR01,RT03,SA01" \
-        -i "pandas.Series.str.wrap RT03,SA01" \
-        -i "pandas.Series.str.zfill RT03" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index c88270b2a2f16..bdb88e981bcda 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1853,6 +1853,7 @@ def zfill(self, width: int):
         Returns
         -------
         Series/Index of objects.
+            A Series or Index where the strings are prepended with '0' characters.
 
         See Also
         --------
@@ -2385,6 +2386,13 @@ def wrap(
         Returns
         -------
         Series or Index
+            A Series or Index where the strings are wrapped at the specified line width.
+
+        See Also
+        --------
+        Series.str.strip : Remove leading and trailing characters in Series/Index.
+        Series.str.lstrip : Remove leading characters in Series/Index.
+        Series.str.rstrip : Remove trailing characters in Series/Index.
 
         Notes
         -----

From 6fa4eb43fbf01d558c9e8cd0fdde6fa5359c9d19 Mon Sep 17 00:00:00 2001
From: Abhinav Reddy <abhinav071197@gmail.com>
Date: Mon, 26 Aug 2024 12:25:02 -0400
Subject: [PATCH 009/176] DOC: Fix Numpy Docstring errors in
 pandas.api.extensions.ExtensionArray (#59605)

* fix duplicated

* fix fillna

* fix insert

* fix isin

* fix tolist

* fix unique

* fix view

---------

Co-authored-by: Abhinav Thimma <athimma2@illinois.edu>
---
 ci/code_checks.sh          |  7 -----
 pandas/core/arrays/base.py | 52 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 916720e5a01e3..4ddc429f2a51c 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -173,14 +173,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.fillna SA01" \
-        -i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
         -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.view SA01" \
         -i "pandas.api.interchange.from_dataframe RT03,SA01" \
         -i "pandas.api.types.is_bool PR01,SA01" \
         -i "pandas.api.types.is_categorical_dtype SA01" \
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f05d1ae18c604..2124f86b03b9c 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1137,6 +1137,13 @@ def fillna(
         ExtensionArray
             With NA/NaN filled.
 
+        See Also
+        --------
+        api.extensions.ExtensionArray.dropna : Return ExtensionArray without
+            NA values.
+        api.extensions.ExtensionArray.isna : A 1-D array indicating if
+            each value is missing.
+
         Examples
         --------
         >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
@@ -1220,6 +1227,15 @@ def duplicated(
         Returns
         -------
         ndarray[bool]
+            With true in indices where elements are duplicated and false otherwise.
+
+        See Also
+        --------
+        DataFrame.duplicated : Return boolean Series denoting
+            duplicate rows.
+        Series.duplicated : Indicate duplicate Series values.
+        api.extensions.ExtensionArray.unique : Compute the ExtensionArray
+            of unique values.
 
         Examples
         --------
@@ -1303,6 +1319,13 @@ def unique(self) -> Self:
         Returns
         -------
         pandas.api.extensions.ExtensionArray
+            With unique values from the input array.
+
+        See Also
+        --------
+        Index.unique: Return unique values in the index.
+        Series.unique: Return unique values of Series object.
+        unique: Return unique values based on a hash table.
 
         Examples
         --------
@@ -1436,10 +1459,18 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         Parameters
         ----------
         values : np.ndarray or ExtensionArray
+            Values to compare every element in the array against.
 
         Returns
         -------
         np.ndarray[bool]
+            With true at indices where value is in `values`.
+
+        See Also
+        --------
+        DataFrame.isin: Whether each element in the DataFrame is contained in values.
+        Index.isin: Return a boolean array where the index values are in values.
+        Series.isin: Whether elements in Series are contained in values.
 
         Examples
         --------
@@ -1743,6 +1774,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         ExtensionArray or np.ndarray
             A view on the :class:`ExtensionArray`'s data.
 
+        See Also
+        --------
+        api.extensions.ExtensionArray.ravel: Return a flattened view on input array.
+        Index.view: Equivalent function for Index.
+        ndarray.view: New view of array with the same data.
+
         Examples
         --------
         This gives view on the underlying data of an ``ExtensionArray`` and is not a
@@ -2201,6 +2238,12 @@ def tolist(self) -> list:
         Returns
         -------
         list
+            Python list of values in array.
+
+        See Also
+        --------
+        Index.to_list: Return a list of the values in the Index.
+        Series.to_list: Return a list of the values in the Series.
 
         Examples
         --------
@@ -2223,11 +2266,18 @@ def insert(self, loc: int, item) -> Self:
         Parameters
         ----------
         loc : int
+            Index where the `item` needs to be inserted.
         item : scalar-like
+            Value to be inserted.
 
         Returns
         -------
-        same type as self
+        ExtensionArray
+            With `item` inserted at `loc`.
+
+        See Also
+        --------
+        Index.insert: Make new Index inserting new item at location.
 
         Notes
         -----

From d31aa834cef5a433938933f75ca20f0268a4ea83 Mon Sep 17 00:00:00 2001
From: ktseng4096 <32848825+ktseng4096@users.noreply.github.com>
Date: Mon, 26 Aug 2024 11:33:43 -0700
Subject: [PATCH 010/176] DOC: add See Also section to
 groupby.DataFrameGroupBy.prod (#59599)

* Update Groupby.prod

* update code_check list

* remove extra spaces

* fix errors

* ruff formatting
---
 ci/code_checks.sh              |  2 -
 pandas/core/groupby/groupby.py | 77 ++++++++++++++++------------------
 2 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4ddc429f2a51c..76cc02652ec24 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -226,7 +226,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-        -i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
@@ -243,7 +242,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-        -i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
         -i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b288dad63179f..8c9c92594ebe7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -164,32 +164,6 @@ class providing the base-class of operations.
             to each row or column of a DataFrame.
 """
 
-_groupby_agg_method_template = """
-Compute {fname} of group values.
-
-Parameters
-----------
-numeric_only : bool, default {no}
-    Include only float, int, boolean columns.
-
-    .. versionchanged:: 2.0.0
-
-        numeric_only no longer accepts ``None``.
-
-min_count : int, default {mc}
-    The required number of valid values to perform the operation. If fewer
-    than ``min_count`` non-NA values are present the result will be NA.
-
-Returns
--------
-Series or DataFrame
-    Computed {fname} of values within each group.
-
-Examples
---------
-{example}
-"""
-
 _groupby_agg_method_engine_template = """
 Compute {fname} of group values.
 
@@ -3029,16 +3003,38 @@ def sum(
             return result
 
     @final
-    @doc(
-        _groupby_agg_method_template,
-        fname="prod",
-        no=False,
-        mc=0,
-        example=dedent(
-            """\
+    def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
+        """
+        Compute prod of group values.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+            .. versionchanged:: 2.0.0
+
+                numeric_only no longer accepts ``None``.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+            Computed prod of values within each group.
+
+        See Also
+        --------
+        Series.prod : Return the product of the values over the requested axis.
+        DataFrame.prod : Return the product of the values over the requested axis.
+
+        Examples
+        --------
         For SeriesGroupBy:
 
-        >>> lst = ['a', 'a', 'b', 'b']
+        >>> lst = ["a", "a", "b", "b"]
         >>> ser = pd.Series([1, 2, 3, 4], index=lst)
         >>> ser
         a    1
@@ -3054,8 +3050,11 @@ def sum(
         For DataFrameGroupBy:
 
         >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
-        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
-        ...                   index=["tiger", "leopard", "cheetah", "lion"])
+        >>> df = pd.DataFrame(
+        ...     data,
+        ...     columns=["a", "b", "c"],
+        ...     index=["tiger", "leopard", "cheetah", "lion"],
+        ... )
         >>> df
                   a  b  c
           tiger   1  8  2
@@ -3066,10 +3065,8 @@ def sum(
              b    c
         a
         1   16   10
-        2   30   72"""
-        ),
-    )
-    def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
+        2   30   72
+        """
         return self._agg_general(
             numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
         )

From bb4ab4f2c0c2806f367679b7131fb98f718a3480 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 26 Aug 2024 20:36:12 +0200
Subject: [PATCH 011/176] ENH: support Arrow PyCapsule Interface on Series for
 export (#59587)

* ENH: support Arrow PyCapsule Interface on Series for export

* simplify

* simplify
---
 doc/source/whatsnew/v3.0.0.rst              |  1 +
 pandas/core/series.py                       | 27 +++++++++++++++++++++
 pandas/tests/series/test_arrow_interface.py | 23 ++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 pandas/tests/series/test_arrow_interface.py

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 1533f9267ce39..eaf9ce899f03a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -43,6 +43,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
+- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
 - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 17494f948876a..4f79e30f48f3c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -34,6 +34,7 @@
 from pandas._libs.lib import is_range_indexer
 from pandas.compat import PYPY
 from pandas.compat._constants import REF_COUNT
+from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.errors import (
     ChainedAssignmentError,
@@ -558,6 +559,32 @@ def _init_dict(
 
     # ----------------------------------------------------------------------
 
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the pandas Series as an Arrow C stream PyCapsule.
+
+        This relies on pyarrow to convert the pandas Series to the Arrow
+        format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
+        in its handling of the index, i.e. to ignore it).
+        This conversion is not necessarily zero-copy.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the dataframe should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+        pa = import_optional_dependency("pyarrow", min_version="16.0.0")
+        ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)])
+        return ca.__arrow_c_stream__(requested_schema)
+
+    # ----------------------------------------------------------------------
+
     @property
     def _constructor(self) -> type[Series]:
         return Series
diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py
new file mode 100644
index 0000000000000..34a2a638e4185
--- /dev/null
+++ b/pandas/tests/series/test_arrow_interface.py
@@ -0,0 +1,23 @@
+import ctypes
+
+import pytest
+
+import pandas as pd
+
+pa = pytest.importorskip("pyarrow", minversion="16.0")
+
+
+def test_series_arrow_interface():
+    s = pd.Series([1, 4, 2])
+
+    capsule = s.__arrow_c_stream__()
+    assert (
+        ctypes.pythonapi.PyCapsule_IsValid(
+            ctypes.py_object(capsule), b"arrow_array_stream"
+        )
+        == 1
+    )
+
+    ca = pa.chunked_array(s)
+    expected = pa.chunked_array([[1, 4, 2]])
+    assert ca.equals(expected)

From 15e9e7acca996660b2e53c3421702b4f41e81fd6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:55:48 -1000
Subject: [PATCH 012/176] REF: Minimize operations in recode_for_groupby
 (#59618)

---
 pandas/core/groupby/categorical.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
index 49130d91a0126..90cd8e3ffa1c7 100644
--- a/pandas/core/groupby/categorical.py
+++ b/pandas/core/groupby/categorical.py
@@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
         # In cases with c.ordered, this is equivalent to
         #  return c.remove_unused_categories(), c
 
-        unique_codes = unique1d(c.codes)  # type: ignore[no-untyped-call]
+        take_codes = unique1d(c.codes[c.codes != -1])  # type: ignore[no-untyped-call]
 
-        take_codes = unique_codes[unique_codes != -1]
         if sort:
             take_codes = np.sort(take_codes)
 
@@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
 
     # sort=False should order groups in as-encountered order (GH-8868)
 
-    # xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
-    all_codes = np.arange(c.categories.nunique())
+    # GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
     # GH 38140: exclude nan from indexer for categories
     unique_notnan_codes = unique1d(c.codes[c.codes != -1])  # type: ignore[no-untyped-call]
     if sort:
         unique_notnan_codes = np.sort(unique_notnan_codes)
-    if len(all_codes) > len(unique_notnan_codes):
+    if (num_cat := len(c.categories)) > len(unique_notnan_codes):
         # GH 13179: All categories need to be present, even if missing from the data
-        missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
+        missing_codes = np.setdiff1d(
+            np.arange(num_cat), unique_notnan_codes, assume_unique=True
+        )
         take_codes = np.concatenate((unique_notnan_codes, missing_codes))
     else:
         take_codes = unique_notnan_codes
 
-    return Categorical(c, c.unique().categories.take(take_codes))
+    return Categorical(c, c.categories.take(take_codes))

From 8f7080b10e2fbcdae1c230c8e659c75f2b76ae18 Mon Sep 17 00:00:00 2001
From: matiaslindgren <matias.lindgren@iki.fi>
Date: Mon, 26 Aug 2024 23:58:32 +0200
Subject: [PATCH 013/176] BUG: allow None as name in multi-index during join
 (#59546)

* allow None as name in multi-index

* update whatsnew

* add unit test for none label joins

* move bugfix note under Reshaping

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst          |  1 +
 pandas/core/indexes/base.py             |  4 ++--
 pandas/tests/reshape/merge/test_join.py | 26 +++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index eaf9ce899f03a..338fbc744510c 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -659,6 +659,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
+- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d39c337fbb4b2..c8dbea1fd39ea 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4516,8 +4516,8 @@ def _join_multi(self, other: Index, how: JoinHow):
         from pandas.core.reshape.merge import restore_dropped_levels_multijoin
 
         # figure out join names
-        self_names_list = list(com.not_none(*self.names))
-        other_names_list = list(com.not_none(*other.names))
+        self_names_list = list(self.names)
+        other_names_list = list(other.names)
         self_names_order = self_names_list.index
         other_names_order = other_names_list.index
         self_names = set(self_names_list)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index f090ded06119a..0f743332acbbe 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -1098,3 +1098,29 @@ def test_join_multiindex_categorical_output_index_dtype(how, values):
 
     result = df1.join(df2, how=how)
     tm.assert_frame_equal(result, expected)
+
+
+def test_join_multiindex_with_none_as_label():
+    # GH 58721
+    df1 = DataFrame(
+        {"A": [1]},
+        index=MultiIndex.from_tuples([(3, 3)], names=["X", None]),
+    )
+    df2 = DataFrame(
+        {"B": [2]},
+        index=MultiIndex.from_tuples([(3, 3)], names=[None, "X"]),
+    )
+
+    result12 = df1.join(df2)
+    expected12 = DataFrame(
+        {"A": [1], "B": [2]},
+        index=MultiIndex.from_tuples([(3, 3)], names=["X", None]),
+    )
+    tm.assert_frame_equal(result12, expected12)
+
+    result21 = df2.join(df1)
+    expected21 = DataFrame(
+        {"B": [2], "A": [1]},
+        index=MultiIndex.from_tuples([(3, 3)], names=[None, "X"]),
+    )
+    tm.assert_frame_equal(result21, expected21)

From bd81fef7edfe835871ee6ddaead759f5a0d1affb Mon Sep 17 00:00:00 2001
From: Kevin Amparado <109636487+KevsterAmp@users.noreply.github.com>
Date: Tue, 27 Aug 2024 08:07:24 +0800
Subject: [PATCH 014/176] PERF: Performance Improvement on `DataFrame.to_csv()`
 when `index=False` (#59608)

* add alternative ix when self.nlevel is 0

* add to latest whatsnew

* change np.full to np.empty
---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/io/formats/csvs.py      | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 338fbc744510c..85a1d1ad566b4 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -505,6 +505,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
+- Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`)
 - Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
 - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 50503e862ef43..75bcb51ef4be2 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -320,7 +320,11 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
         res = df._get_values_for_csv(**self._number_format)
         data = list(res._iter_column_arrays())
 
-        ix = self.data_index[slicer]._get_values_for_csv(**self._number_format)
+        ix = (
+            self.data_index[slicer]._get_values_for_csv(**self._number_format)
+            if self.nlevels != 0
+            else np.empty(end_i - start_i)
+        )
         libwriters.write_csv_rows(
             data,
             ix,

From 7c365796f866f7ead3fdea4ed1bf8083b096164f Mon Sep 17 00:00:00 2001
From: Harsha Lakamsani <hlakamsani@gmail.com>
Date: Mon, 26 Aug 2024 17:09:21 -0700
Subject: [PATCH 015/176] DOC: fix docstring validation errors for
 pandas.io.formats.style.Styler (#59607)

* DOC: all pandas.io.formats.style.Styler docstring validation errors fixed

* DOCS: base to_excel docstring template extended for pandas.io.formats.style.Styler.to_excel
---
 ci/code_checks.sh          |  28 ----------
 pandas/core/generic.py     |   6 ++-
 pandas/io/formats/style.py | 106 +++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 29 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 76cc02652ec24..25317a08ca7b0 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -294,34 +294,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.UnsupportedFunctionCall SA01" \
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
-        -i "pandas.io.formats.style.Styler.apply RT03" \
-        -i "pandas.io.formats.style.Styler.apply_index RT03" \
-        -i "pandas.io.formats.style.Styler.background_gradient RT03" \
-        -i "pandas.io.formats.style.Styler.bar RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.clear SA01" \
-        -i "pandas.io.formats.style.Styler.concat RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.export RT03" \
-        -i "pandas.io.formats.style.Styler.from_custom_template SA01" \
-        -i "pandas.io.formats.style.Styler.hide RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.highlight_between RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_max RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_min RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_null RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_quantile RT03" \
-        -i "pandas.io.formats.style.Styler.map RT03" \
-        -i "pandas.io.formats.style.Styler.map_index RT03" \
-        -i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_table_attributes PR07,RT03" \
-        -i "pandas.io.formats.style.Styler.set_table_styles RT03" \
-        -i "pandas.io.formats.style.Styler.set_td_classes RT03" \
-        -i "pandas.io.formats.style.Styler.set_tooltips RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_uuid PR07,RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.text_gradient RT03" \
-        -i "pandas.io.formats.style.Styler.to_excel PR01" \
-        -i "pandas.io.formats.style.Styler.to_string SA01" \
-        -i "pandas.io.formats.style.Styler.use RT03" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
         -i "pandas.io.stata.StataReader.data_label SA01" \
         -i "pandas.io.stata.StataReader.value_labels RT03,SA01" \
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 61fa5c49a8c5b..eae3249aa79a4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2123,11 +2123,13 @@ def _repr_data_resource_(self):
         klass="object",
         storage_options=_shared_docs["storage_options"],
         storage_options_versionadded="1.2.0",
+        encoding_parameter="",
+        verbose_parameter="",
         extra_parameters=textwrap.dedent(
             """\
         engine_kwargs : dict, optional
             Arbitrary keyword arguments passed to excel engine.
-    """
+        """
         ),
     )
     def to_excel(
@@ -2196,9 +2198,11 @@ def to_excel(
 
         merge_cells : bool, default True
             Write MultiIndex and Hierarchical Rows as merged cells.
+        {encoding_parameter}
         inf_rep : str, default 'inf'
             Representation for infinity (there is no native representation for
             infinity in Excel).
+        {verbose_parameter}
         freeze_panes : tuple of int (length 2), optional
             Specifies the one-based bottommost row and rightmost column that
             is to be frozen.
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 6f4c2fa6c6eae..82bc0301fed3a 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -7,6 +7,7 @@
 import copy
 from functools import partial
 import operator
+import textwrap
 from typing import (
     TYPE_CHECKING,
     overload,
@@ -306,6 +307,12 @@ def concat(self, other: Styler) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with specified Styler appended.
+
+        See Also
+        --------
+        Styler.clear : Reset the ``Styler``, removing any previously applied styles.
+        Styler.export : Export the styles applied to the current Styler.
 
         Notes
         -----
@@ -447,6 +454,15 @@ def set_tooltips(
         Returns
         -------
         Styler
+            Instance of class with DataFrame set for strings on ``Styler``
+                generating ``:hover`` tooltips.
+
+        See Also
+        --------
+        Styler.set_table_attributes : Set the table attributes added to the
+            ``<table>`` HTML element.
+        Styler.set_table_styles : Set the table styles included within the
+            ``<style>`` HTML element.
 
         Notes
         -----
@@ -537,6 +553,18 @@ def set_tooltips(
         klass="Styler",
         storage_options=_shared_docs["storage_options"],
         storage_options_versionadded="1.5.0",
+        encoding_parameter=textwrap.dedent(
+            """\
+        encoding : str or None, default None
+            Unused parameter, present for compatibility.
+        """
+        ),
+        verbose_parameter=textwrap.dedent(
+            """\
+        verbose : str, default True
+            Optional unused parameter, present for compatibility.
+        """
+        ),
         extra_parameters="",
     )
     def to_excel(
@@ -1456,6 +1484,10 @@ def to_string(
         str or None
             If `buf` is None, returns the result as a string. Otherwise returns `None`.
 
+        See Also
+        --------
+        DataFrame.to_string : Render a DataFrame to a console-friendly tabular output.
+
         Examples
         --------
         >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
@@ -1495,6 +1527,8 @@ def set_td_classes(self, classes: DataFrame) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with ``class`` attribute set for ``<td>``
+                HTML elements.
 
         See Also
         --------
@@ -1700,6 +1734,14 @@ def clear(self) -> None:
 
         Returns None.
 
+        See Also
+        --------
+        Styler.apply : Apply a CSS-styling function column-wise, row-wise,
+            or table-wise.
+        Styler.export : Export the styles applied to the current Styler.
+        Styler.map : Apply a CSS-styling function elementwise.
+        Styler.use : Set the styles on the current Styler.
+
         Examples
         --------
         >>> df = pd.DataFrame({"A": [1, 2], "B": [3, np.nan]})
@@ -1821,6 +1863,7 @@ def apply(
         Returns
         -------
         Styler
+            Instance of class with CSS applied to its HTML representation.
 
         See Also
         --------
@@ -1941,6 +1984,7 @@ def apply_index(
         Returns
         -------
         Styler
+            Instance of class with CSS applied to its HTML representation.
 
         See Also
         --------
@@ -2041,6 +2085,7 @@ def map(self, func: Callable, subset: Subset | None = None, **kwargs) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with CSS-styling function applied elementwise.
 
         See Also
         --------
@@ -2093,10 +2138,12 @@ def set_table_attributes(self, attributes: str) -> Styler:
         Parameters
         ----------
         attributes : str
+            Table attributes to be added to the ``<table>`` HTML element.
 
         Returns
         -------
         Styler
+            Instance of class with specified table attributes set.
 
         See Also
         --------
@@ -2123,6 +2170,7 @@ def export(self) -> dict[str, Any]:
         Returns
         -------
         dict
+            Contains data-independent (exportable) styles applied to current Styler.
 
         See Also
         --------
@@ -2199,6 +2247,7 @@ def use(self, styles: dict[str, Any]) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with defined styler attributes added.
 
         See Also
         --------
@@ -2246,10 +2295,19 @@ def set_uuid(self, uuid: str) -> Styler:
         Parameters
         ----------
         uuid : str
+            The uuid to be applied to ``id`` attributes of HTML elements.
 
         Returns
         -------
         Styler
+            Instance of class with specified uuid for `id` attributes set.
+
+        See Also
+        --------
+        Styler.set_caption : Set the text added to a ``<caption>`` HTML element.
+        Styler.set_td_classes : Set the ``class`` attribute of ``<td>`` HTML elements.
+        Styler.set_tooltips : Set the DataFrame of strings on ``Styler`` generating
+            ``:hover`` tooltips.
 
         Notes
         -----
@@ -2290,6 +2348,14 @@ def set_caption(self, caption: str | tuple | list) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with text set for ``<caption>`` HTML element.
+
+        See Also
+        --------
+        Styler.set_td_classes : Set the ``class`` attribute of ``<td>`` HTML elements.
+        Styler.set_tooltips : Set the DataFrame of strings on ``Styler`` generating
+            ``:hover`` tooltips.
+        Styler.set_uuid : Set the uuid applied to ``id`` attributes of HTML elements.
 
         Examples
         --------
@@ -2336,6 +2402,13 @@ def set_sticky(
         Returns
         -------
         Styler
+            Instance of class with CSS set for permanently displaying headers
+                in scrolling frame.
+
+        See Also
+        --------
+        Styler.set_properties : Set defined CSS-properties to each ``<td>``
+            HTML element for the given subset.
 
         Notes
         -----
@@ -2496,6 +2569,7 @@ def set_table_styles(
         Returns
         -------
         Styler
+            Instance of class with specified table styles set.
 
         See Also
         --------
@@ -2627,6 +2701,13 @@ def hide(
         Returns
         -------
         Styler
+            Instance of class with specified headers/rows/columns hidden from display.
+
+        See Also
+        --------
+        Styler.apply : Apply a CSS-styling function column-wise, row-wise,
+            or table-wise.
+        Styler.map : Apply a CSS-styling function elementwise.
 
         Notes
         -----
@@ -2865,6 +2946,7 @@ def background_gradient(
         Returns
         -------
         Styler
+            Instance of class with {name} colored in gradient style.
 
         See Also
         --------
@@ -3002,6 +3084,13 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with CSS-properties set for each ``<td>`` HTML element
+                in the given subset
+
+        See Also
+        --------
+        Styler.set_sticky : Add CSS to permanently display the index or column
+            headers in a scrolling frame.
 
         Notes
         -----
@@ -3099,6 +3188,13 @@ def bar(
         Returns
         -------
         Styler
+            Contains list-like attribute with bar chart data as formatted CSS.
+
+        See Also
+        --------
+        PlotAccessor.bar : Vertical bar plot.
+        PlotAccessor.line : Plot Series or DataFrame as lines.
+        PlotAccessor.pie : Generate a pie plot.
 
         Notes
         -----
@@ -3177,6 +3273,7 @@ def highlight_null(
         Returns
         -------
         Styler
+            Instance of class where null values are highlighted with given style.
 
         See Also
         --------
@@ -3231,6 +3328,7 @@ def highlight_max(
         Returns
         -------
         Styler
+            Instance of class where max value is highlighted in given style.
 
         See Also
         --------
@@ -3287,6 +3385,7 @@ def highlight_min(
         Returns
         -------
         Styler
+            Instance of class where min value is highlighted in given style.
 
         See Also
         --------
@@ -3351,6 +3450,7 @@ def highlight_between(
         Returns
         -------
         Styler
+            Instance of class with range highlighted in given style.
 
         See Also
         --------
@@ -3471,6 +3571,7 @@ def highlight_quantile(
         Returns
         -------
         Styler
+            Instance of class where values in quantile highlighted with given style.
 
         See Also
         --------
@@ -3576,6 +3677,11 @@ def from_custom_template(
             Has the correct ``env``,``template_html``, ``template_html_table`` and
             ``template_html_style`` class attributes set.
 
+        See Also
+        --------
+        Styler.export : Export the styles applied to the current Styler.
+        Styler.use : Set the styles on the current Styler.
+
         Examples
         --------
         >>> from pandas.io.formats.style import Styler

From 1b7bfedd2c89c2bb3350b6119d9ab86f3e033367 Mon Sep 17 00:00:00 2001
From: Maushumee <maushumee.vyavahare@gmail.com>
Date: Mon, 26 Aug 2024 20:10:30 -0400
Subject: [PATCH 016/176] BUG: Fix for pandas.to_datetime reports incorrect
 index when failing (#59594)

* Remove index from error

* Fix test failures

* Add entry to docs/source/whatsnew

* Update pandas/tests/tools/test_to_datetime.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/tests/tools/test_to_datetime.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst                |   1 +
 pandas/_libs/tslib.pyx                        |   2 +-
 pandas/_libs/tslibs/strptime.pyx              |   2 +-
 pandas/tests/frame/test_block_internals.py    |   2 +-
 pandas/tests/io/parser/test_parse_dates.py    |   4 +-
 pandas/tests/series/test_constructors.py      |   2 +-
 pandas/tests/tools/test_to_datetime.py        | 117 ++++++++----------
 pandas/tests/tslibs/test_array_to_datetime.py |   2 +-
 8 files changed, 62 insertions(+), 70 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 85a1d1ad566b4..eaee01eacb97b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -555,6 +555,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
 - Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
 - Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
+- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
 Timedelta
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 928d253bf3169..3c5854602df53 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -439,7 +439,7 @@ cpdef array_to_datetime(
                 raise TypeError(f"{type(val)} is not convertible to datetime")
 
         except (TypeError, OverflowError, ValueError) as ex:
-            ex.args = (f"{ex}, at position {i}",)
+            ex.args = (f"{ex}",)
             if is_coerce:
                 iresult[i] = NPY_NAT
                 continue
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index ccb1a1d6870f7..ed784b6f5ab22 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -536,7 +536,7 @@ def array_strptime(
 
         except ValueError as ex:
             ex.args = (
-                f"{str(ex)}, at position {i}. You might want to try:\n"
+                f"{str(ex)}. You might want to try:\n"
                 "    - passing `format` if your strings have a consistent format;\n"
                 "    - passing `format='ISO8601'` if your strings are "
                 "all ISO8601 but not necessarily in exactly the same format;\n"
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index 47eb387abc8e8..25e66a0e1c03d 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -251,7 +251,7 @@ def f(dtype):
             f("float64")
 
         # 10822
-        msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
+        msg = "^Unknown datetime string format, unable to parse: aa$"
         with pytest.raises(ValueError, match=msg):
             f("M8[ns]")
 
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 386348c4bd687..532fcc5cd880c 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -507,8 +507,8 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
     with pytest.raises(
         ValueError,
         match=(
-            r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y", '
-            r"at position 1. You might want to try:"
+            r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y". '
+            r"You might want to try:"
         ),
     ):
         pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 57b14d4b82a63..1771a4dfdb71f 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -83,7 +83,7 @@ def test_infer_with_date_and_datetime(self):
     def test_unparsable_strings_with_dt64_dtype(self):
         # pre-2.0 these would be silently ignored and come back with object dtype
         vals = ["aa"]
-        msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
+        msg = "^Unknown datetime string format, unable to parse: aa$"
         with pytest.raises(ValueError, match=msg):
             Series(vals, dtype="datetime64[ns]")
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 658e16bfe5682..a9d3c235f63f6 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -133,8 +133,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
         with pytest.raises(
             ValueError,
             match=(
-                'unconverted data remains when parsing with format "%Y%m%d": ".0", '
-                "at position 0"
+                'unconverted data remains when parsing with format "%Y%m%d": ".0". '
             ),
         ):
             # https://github.com/pandas-dev/pandas/issues/50051
@@ -514,10 +513,9 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
 
         msg = "|".join(
             [
-                r'^time data ".*" doesn\'t match format ".*", at position 0. '
+                r'^time data ".*" doesn\'t match format ".*". ' f"{PARSING_ERR_MSG}$",
+                r'^unconverted data remains when parsing with format ".*": ".*". '
                 f"{PARSING_ERR_MSG}$",
-                r'^unconverted data remains when parsing with format ".*": ".*", '
-                f"at position 0. {PARSING_ERR_MSG}$",
             ]
         )
         with pytest.raises(ValueError, match=msg):
@@ -539,7 +537,7 @@ def test_to_datetime_overflow(self):
         # TODO: Timestamp raises ValueError("could not convert string to Timestamp")
         #  can we make these more consistent?
         arg = "08335394550"
-        msg = 'Parsing "08335394550" to datetime overflows, at position 0'
+        msg = 'Parsing "08335394550" to datetime overflows'
         with pytest.raises(OutOfBoundsDatetime, match=msg):
             to_datetime(arg)
 
@@ -1309,8 +1307,8 @@ def test_datetime_bool_arrays_mixed(self, cache):
         with pytest.raises(
             ValueError,
             match=(
-                r'^time data "True" doesn\'t match format "%Y%m%d", '
-                f"at position 1. {PARSING_ERR_MSG}$"
+                r'^time data "True" doesn\'t match format "%Y%m%d". '
+                f"{PARSING_ERR_MSG}$"
             ),
         ):
             to_datetime(["20130101", True], cache=cache)
@@ -1345,12 +1343,12 @@ def test_datetime_invalid_scalar(self, value, format):
 
         msg = "|".join(
             [
-                r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
+                r'^time data "a" doesn\'t match format "%H:%M:%S". '
                 f"{PARSING_ERR_MSG}$",
-                r'^Given date string "a" not likely a datetime, at position 0$',
-                r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
-                f"at position 0. {PARSING_ERR_MSG}$",
-                r"^second must be in 0..59: 00:01:99, at position 0$",
+                r'^Given date string "a" not likely a datetime$',
+                r'^unconverted data remains when parsing with format "%H:%M:%S": "9". '
+                f"{PARSING_ERR_MSG}$",
+                r"^second must be in 0..59: 00:01:99$",
             ]
         )
         with pytest.raises(ValueError, match=msg):
@@ -1368,7 +1366,7 @@ def test_datetime_outofbounds_scalar(self, value, format):
             assert res is NaT
 
         if format is not None:
-            msg = r'^time data ".*" doesn\'t match format ".*", at position 0.'
+            msg = r'^time data ".*" doesn\'t match format ".*"'
             with pytest.raises(ValueError, match=msg):
                 to_datetime(value, errors="raise", format=format)
         else:
@@ -1397,12 +1395,12 @@ def test_datetime_invalid_index(self, values, format):
 
         msg = "|".join(
             [
-                r'^Given date string "a" not likely a datetime, at position 0$',
-                r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
+                r'^Given date string "a" not likely a datetime$',
+                r'^time data "a" doesn\'t match format "%H:%M:%S". '
+                f"{PARSING_ERR_MSG}$",
+                r'^unconverted data remains when parsing with format "%H:%M:%S": "9". '
                 f"{PARSING_ERR_MSG}$",
-                r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
-                f"at position 0. {PARSING_ERR_MSG}$",
-                r"^second must be in 0..59: 00:01:99, at position 0$",
+                r"^second must be in 0..59: 00:01:99$",
             ]
         )
         with pytest.raises(ValueError, match=msg):
@@ -1582,8 +1580,7 @@ def test_to_datetime_malformed_raise(self):
         ts_strings = ["200622-12-31", "111111-24-11"]
         msg = (
             'Parsed string "200622-12-31" gives an invalid tzoffset, which must '
-            r"be between -timedelta\(hours=24\) and timedelta\(hours=24\), "
-            "at position 0"
+            r"be between -timedelta\(hours=24\) and timedelta\(hours=24\)"
         )
         with pytest.raises(
             ValueError,
@@ -1748,7 +1745,7 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
         with pytest.raises(ValueError, match=msg):
             to_datetime(np.array([1.5]), unit=unit, errors="raise")
 
-        msg = r"Given date string \"1.5\" not likely a datetime, at position 0"
+        msg = r"Given date string \"1.5\" not likely a datetime"
         with pytest.raises(ValueError, match=msg):
             to_datetime(["1.5"], unit=unit, errors="raise")
 
@@ -1802,7 +1799,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
     def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache):
         # if we have a string, then we raise a ValueError
         # and NOT an OutOfBoundsDatetime
-        msg = "Unknown datetime string format, unable to parse: foo, at position 0"
+        msg = "Unknown datetime string format, unable to parse: foo"
         with pytest.raises(ValueError, match=msg):
             to_datetime("foo", errors="raise", unit="s", cache=cache)
 
@@ -1938,12 +1935,9 @@ def test_to_datetime_unit_na_values(self):
     @pytest.mark.parametrize("bad_val", ["foo", 111111111])
     def test_to_datetime_unit_invalid(self, bad_val):
         if bad_val == "foo":
-            msg = (
-                "Unknown datetime string format, unable to parse: "
-                f"{bad_val}, at position 2"
-            )
+            msg = "Unknown datetime string format, unable to parse: " f"{bad_val}"
         else:
-            msg = "cannot convert input 111111111 with the unit 'D', at position 2"
+            msg = "cannot convert input 111111111 with the unit 'D'"
         with pytest.raises(ValueError, match=msg):
             to_datetime([1, 2, bad_val], unit="D")
 
@@ -2096,7 +2090,7 @@ def test_dataframe_coerce(self, cache):
 
         msg = (
             r'^cannot assemble the datetimes: time data ".+" doesn\'t '
-            r'match format "%Y%m%d", at position 1\.'
+            r'match format "%Y%m%d"\.'
         )
         with pytest.raises(ValueError, match=msg):
             to_datetime(df2, cache=cache)
@@ -2174,7 +2168,7 @@ def test_dataframe_float(self, cache):
         df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
         msg = (
             r"^cannot assemble the datetimes: unconverted data remains when parsing "
-            r'with format ".*": "1", at position 0.'
+            r'with format ".*": "1".'
         )
         with pytest.raises(ValueError, match=msg):
             to_datetime(df, cache=cache)
@@ -2196,7 +2190,7 @@ def test_to_datetime_barely_out_of_bounds(self):
         # in an in-bounds datetime
         arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
 
-        msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
+        msg = "^Out of bounds nanosecond timestamp: .*"
         with pytest.raises(OutOfBoundsDatetime, match=msg):
             to_datetime(arr)
 
@@ -2231,10 +2225,7 @@ def test_to_datetime_iso8601_fails(self, input, format, exact):
         # `format` is longer than the string, so this fails regardless of `exact`
         with pytest.raises(
             ValueError,
-            match=(
-                rf"time data \"{input}\" doesn't match format "
-                rf"\"{format}\", at position 0"
-            ),
+            match=(rf"time data \"{input}\" doesn't match format " rf"\"{format}\""),
         ):
             to_datetime(input, format=format, exact=exact)
 
@@ -2253,10 +2244,9 @@ def test_to_datetime_iso8601_exact_fails(self, input, format):
         # `format` is shorter than the date string, so only fails with `exact=True`
         msg = "|".join(
             [
-                '^unconverted data remains when parsing with format ".*": ".*"'
-                f", at position 0. {PARSING_ERR_MSG}$",
-                f'^time data ".*" doesn\'t match format ".*", at position 0. '
+                '^unconverted data remains when parsing with format ".*": ".*". '
                 f"{PARSING_ERR_MSG}$",
+                f'^time data ".*" doesn\'t match format ".*". ' f"{PARSING_ERR_MSG}$",
             ]
         )
         with pytest.raises(
@@ -2297,10 +2287,7 @@ def test_to_datetime_iso8601_separator(self, input, format):
         # https://github.com/pandas-dev/pandas/issues/12649
         with pytest.raises(
             ValueError,
-            match=(
-                rf"time data \"{input}\" doesn\'t match format "
-                rf"\"{format}\", at position 0"
-            ),
+            match=(rf"time data \"{input}\" doesn\'t match format " rf"\"{format}\""),
         ):
             to_datetime(input, format=format)
 
@@ -2390,8 +2377,7 @@ def test_to_datetime_with_space_in_series(self, cache):
         # GH 6428
         ser = Series(["10/18/2006", "10/18/2008", " "])
         msg = (
-            r'^time data " " doesn\'t match format "%m/%d/%Y", '
-            rf"at position 2. {PARSING_ERR_MSG}$"
+            r'^time data " " doesn\'t match format "%m/%d/%Y". ' rf"{PARSING_ERR_MSG}$"
         )
         with pytest.raises(ValueError, match=msg):
             to_datetime(ser, errors="raise", cache=cache)
@@ -2466,7 +2452,7 @@ def test_to_datetime_strings_vs_constructor(self, result):
     def test_to_datetime_unprocessable_input(self, cache):
         # GH 4928
         # GH 21864
-        msg = '^Given date string "1" not likely a datetime, at position 1$'
+        msg = '^Given date string "1" not likely a datetime$'
         with pytest.raises(ValueError, match=msg):
             to_datetime([1, "1"], errors="raise", cache=cache)
 
@@ -2643,7 +2629,7 @@ def test_dayfirst_warnings_invalid_input(self):
             ValueError,
             match=(
                 r'^time data "03/30/2011" doesn\'t match format '
-                rf'"%d/%m/%Y", at position 1. {PARSING_ERR_MSG}$'
+                rf'"%d/%m/%Y". {PARSING_ERR_MSG}$'
             ),
         ):
             to_datetime(arr, dayfirst=True)
@@ -2714,7 +2700,7 @@ def test_to_datetime_inconsistent_format(self, cache):
         ser = Series(np.array(data))
         msg = (
             r'^time data "01-02-2011 00:00:00" doesn\'t match format '
-            rf'"%m/%d/%Y %H:%M:%S", at position 1. {PARSING_ERR_MSG}$'
+            rf'"%m/%d/%Y %H:%M:%S". {PARSING_ERR_MSG}$'
         )
         with pytest.raises(ValueError, match=msg):
             to_datetime(ser, cache=cache)
@@ -2820,7 +2806,7 @@ def test_day_not_in_month_coerce(self, cache, arg, format):
         assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
 
     def test_day_not_in_month_raise(self, cache):
-        msg = "day is out of range for month: 2015-02-29, at position 0"
+        msg = "day is out of range for month: 2015-02-29"
         with pytest.raises(ValueError, match=msg):
             to_datetime("2015-02-29", errors="raise", cache=cache)
 
@@ -2830,34 +2816,34 @@ def test_day_not_in_month_raise(self, cache):
             (
                 "2015-02-29",
                 "%Y-%m-%d",
-                f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
+                f"^day is out of range for month. {PARSING_ERR_MSG}$",
             ),
             (
                 "2015-29-02",
                 "%Y-%d-%m",
-                f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
+                f"^day is out of range for month. {PARSING_ERR_MSG}$",
             ),
             (
                 "2015-02-32",
                 "%Y-%m-%d",
-                '^unconverted data remains when parsing with format "%Y-%m-%d": "2", '
-                f"at position 0. {PARSING_ERR_MSG}$",
+                '^unconverted data remains when parsing with format "%Y-%m-%d": "2". '
+                f"{PARSING_ERR_MSG}$",
             ),
             (
                 "2015-32-02",
                 "%Y-%d-%m",
-                '^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
-                f"at position 0. {PARSING_ERR_MSG}$",
+                '^time data "2015-32-02" doesn\'t match format "%Y-%d-%m". '
+                f"{PARSING_ERR_MSG}$",
             ),
             (
                 "2015-04-31",
                 "%Y-%m-%d",
-                f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
+                f"^day is out of range for month. {PARSING_ERR_MSG}$",
             ),
             (
                 "2015-31-04",
                 "%Y-%d-%m",
-                f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
+                f"^day is out of range for month. {PARSING_ERR_MSG}$",
             ),
         ],
     )
@@ -3226,9 +3212,7 @@ def test_invalid_origins_tzinfo(self):
 
     def test_incorrect_value_exception(self):
         # GH47495
-        msg = (
-            "Unknown datetime string format, unable to parse: yesterday, at position 1"
-        )
+        msg = "Unknown datetime string format, unable to parse: yesterday"
         with pytest.raises(ValueError, match=msg):
             to_datetime(["today", "yesterday"])
 
@@ -3249,7 +3233,7 @@ def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
             assert res.month == 10
             assert res.day == 10
         else:
-            msg = "unconverted data remains when parsing with format.*, at position 0"
+            msg = "unconverted data remains when parsing with format.*"
             with pytest.raises(ValueError, match=msg):
                 to_datetime("2417-10-10 00:00:00.00", format=format)
 
@@ -3473,9 +3457,7 @@ def test_to_datetime_mixed_or_iso_exact(exact, format):
 
 def test_to_datetime_mixed_not_necessarily_iso8601_raise():
     # https://github.com/pandas-dev/pandas/issues/50411
-    with pytest.raises(
-        ValueError, match="Time data 01-01-2000 is not ISO8601 format, at position 1"
-    ):
+    with pytest.raises(ValueError, match="Time data 01-01-2000 is not ISO8601 format"):
         to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601")
 
 
@@ -3500,6 +3482,15 @@ def test_unknown_tz_raises():
         to_datetime([dtstr])
 
 
+def test_unformatted_input_raises():
+    valid, invalid = "2024-01-01", "N"
+    ser = Series([valid] * start_caching_at + [invalid])
+    msg = 'time data "N" doesn\'t match format "%Y-%m-%d"'
+
+    with pytest.raises(ValueError, match=msg):
+        to_datetime(ser, format="%Y-%m-%d", exact=True, cache=True)
+
+
 def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
     # GH 52425
     pytest.importorskip("pyarrow")
diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py
index 3c55ae2c6f904..fc0000553049e 100644
--- a/pandas/tests/tslibs/test_array_to_datetime.py
+++ b/pandas/tests/tslibs/test_array_to_datetime.py
@@ -260,7 +260,7 @@ def test_to_datetime_barely_out_of_bounds():
     # Close enough to bounds that dropping nanos
     # would result in an in-bounds datetime.
     arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
-    msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$"
+    msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16$"
 
     with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
         tslib.array_to_datetime(arr)

From 67bec1fe336dd72373b9b1851519a0508878dfe8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Aug 2024 17:51:42 +0200
Subject: [PATCH 017/176] String dtype: avoid surfacing pyarrow exception in
 binary operations (#59610)

---
 pandas/core/arrays/arrow/array.py             | 38 +++++++++++++---
 pandas/core/arrays/string_.py                 |  5 ++-
 pandas/tests/arithmetic/test_object.py        | 25 +++--------
 .../tests/arrays/boolean/test_arithmetic.py   | 26 +++--------
 .../tests/arrays/floating/test_arithmetic.py  | 23 ++++------
 .../tests/arrays/integer/test_arithmetic.py   | 34 +++++---------
 pandas/tests/extension/base/ops.py            | 10 +----
 .../tests/extension/decimal/test_decimal.py   |  2 +-
 pandas/tests/extension/test_arrow.py          | 38 +++-------------
 pandas/tests/extension/test_string.py         | 32 ++++---------
 pandas/tests/frame/test_logical_ops.py        | 20 +++------
 pandas/tests/frame/test_unary.py              | 26 +++--------
 pandas/tests/indexes/object/test_indexing.py  | 45 ++++++-------------
 pandas/tests/indexes/test_old_base.py         | 14 ++----
 pandas/tests/series/test_arithmetic.py        | 26 +++--------
 pandas/tests/series/test_logical_ops.py       | 35 +++++----------
 16 files changed, 127 insertions(+), 272 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e95fa441e18fb..a374afcacc45a 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -681,7 +681,12 @@ def __invert__(self) -> Self:
             return type(self)(pc.invert(self._pa_array))
 
     def __neg__(self) -> Self:
-        return type(self)(pc.negate_checked(self._pa_array))
+        try:
+            return type(self)(pc.negate_checked(self._pa_array))
+        except pa.ArrowNotImplementedError as err:
+            raise TypeError(
+                f"unary '-' not supported for dtype '{self.dtype}'"
+            ) from err
 
     def __pos__(self) -> Self:
         return type(self)(self._pa_array)
@@ -736,8 +741,19 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
             )
         return ArrowExtensionArray(result)
 
+    def _op_method_error_message(self, other, op) -> str:
+        if hasattr(other, "dtype"):
+            other_type = f"dtype '{other.dtype}'"
+        else:
+            other_type = f"object of type {type(other)}"
+        return (
+            f"operation '{op.__name__}' not supported for "
+            f"dtype '{self.dtype}' with {other_type}"
+        )
+
     def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
         pa_type = self._pa_array.type
+        other_original = other
         other = self._box_pa(other)
 
         if (
@@ -747,10 +763,15 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
         ):
             if op in [operator.add, roperator.radd]:
                 sep = pa.scalar("", type=pa_type)
-                if op is operator.add:
-                    result = pc.binary_join_element_wise(self._pa_array, other, sep)
-                elif op is roperator.radd:
-                    result = pc.binary_join_element_wise(other, self._pa_array, sep)
+                try:
+                    if op is operator.add:
+                        result = pc.binary_join_element_wise(self._pa_array, other, sep)
+                    elif op is roperator.radd:
+                        result = pc.binary_join_element_wise(other, self._pa_array, sep)
+                except pa.ArrowNotImplementedError as err:
+                    raise TypeError(
+                        self._op_method_error_message(other_original, op)
+                    ) from err
                 return type(self)(result)
             elif op in [operator.mul, roperator.rmul]:
                 binary = self._pa_array
@@ -782,9 +803,14 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
 
         pc_func = arrow_funcs[op.__name__]
         if pc_func is NotImplemented:
+            if pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
+                raise TypeError(self._op_method_error_message(other_original, op))
             raise NotImplementedError(f"{op.__name__} not implemented.")
 
-        result = pc_func(self._pa_array, other)
+        try:
+            result = pc_func(self._pa_array, other)
+        except pa.ArrowNotImplementedError as err:
+            raise TypeError(self._op_method_error_message(other_original, op)) from err
         return type(self)(result)
 
     def _logical_method(self, other, op) -> Self:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 2e7f9314c4f09..1d8678a0bb7e7 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -825,8 +825,11 @@ def _cmp_method(self, other, op):
                     f"Lengths of operands do not match: {len(self)} != {len(other)}"
                 )
 
-            other = np.asarray(other)
+            # for array-likes, first filter out NAs before converting to numpy
+            if not is_array_like(other):
+                other = np.asarray(other)
             other = other[valid]
+            other = np.asarray(other)
 
         if op.__name__ in ops.ARITHMETIC_BINOPS:
             result = np.empty_like(self._ndarray, dtype="object")
diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index 899ea1910d055..bc0f78d3aa01a 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -8,9 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -318,27 +315,17 @@ def test_add(self):
         expected = pd.Index(["1a", "1b", "1c"])
         tm.assert_index_equal("1" + index, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
-    def test_sub_fail(self, using_infer_string):
+    def test_sub_fail(self):
         index = pd.Index([str(i) for i in range(10)])
 
-        if using_infer_string:
-            import pyarrow as pa
-
-            err = pa.lib.ArrowNotImplementedError
-            msg = "has no kernel"
-        else:
-            err = TypeError
-            msg = "unsupported operand type|Cannot broadcast"
-        with pytest.raises(err, match=msg):
+        msg = "unsupported operand type|Cannot broadcast|sub' not supported"
+        with pytest.raises(TypeError, match=msg):
             index - "a"
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index - index
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index - index.tolist()
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index.tolist() - index
 
     def test_sub_object(self):
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
index 4dbd8eb9f5ca7..9ff690cdc914d 100644
--- a/pandas/tests/arrays/boolean/test_arithmetic.py
+++ b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -3,10 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 import pandas as pd
 import pandas._testing as tm
 
@@ -94,19 +90,8 @@ def test_op_int8(left_array, right_array, opname):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     # invalid ops
-
-    if using_infer_string:
-        import pyarrow as pa
-
-        err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        err = TypeError
-
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
@@ -116,7 +101,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
         "did not contain a loop with signature matching types|"
         "BooleanArray cannot perform the operation|"
         "not supported for the input types, and the inputs could not be safely coerced "
-        "to any supported types according to the casting rule ''safe''"
+        "to any supported types according to the casting rule ''safe''|"
+        "not supported for dtype"
     )
     with pytest.raises(TypeError, match=msg):
         ops("foo")
@@ -125,9 +111,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             r"unsupported operand type\(s\) for",
             "Concatenation operation is not implemented for NumPy arrays",
             "has no kernel",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(err, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
@@ -140,7 +127,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
                 "not all arguments converted during string formatting",
                 "has no kernel",
                 "not implemented",
+                "not supported for dtype",
             ]
         )
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             ops(pd.Series("foo", index=s.index))
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 768d3c1449fa4..009fac4c2f5ed 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import FloatingArray
@@ -124,19 +122,11 @@ def test_arith_zero_dim_ndarray(other):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
 
-    if using_infer_string:
-        import pyarrow as pa
-
-        errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        errs = TypeError
-
     # invalid scalars
     msg = "|".join(
         [
@@ -152,15 +142,17 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "Concatenation operation is not implemented for NumPy arrays",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
+            "Can only string multiply by an integer",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops("foo")
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series("foo", index=s.index))
 
     msg = "|".join(
@@ -181,9 +173,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "cannot subtract DatetimeArray from ndarray",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series(pd.date_range("20180101", periods=len(s))))
 
 
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index 8aa8c2db940b4..dee3deeee0f2f 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core import ops
@@ -174,19 +172,11 @@ def test_numpy_zero_dim_ndarray(other):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
 
-    if using_infer_string:
-        import pyarrow as pa
-
-        errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        errs = TypeError
-
     # invalid scalars
     msg = "|".join(
         [
@@ -201,24 +191,21 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "has no kernel",
             "not implemented",
             "The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops("foo")
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
     str_ser = pd.Series("foo", index=s.index)
     # with pytest.raises(TypeError, match=msg):
-    if (
-        all_arithmetic_operators
-        in [
-            "__mul__",
-            "__rmul__",
-        ]
-        and not using_infer_string
-    ):  # (data[~data.isna()] >= 0).all():
+    if all_arithmetic_operators in [
+        "__mul__",
+        "__rmul__",
+    ]:  # (data[~data.isna()] >= 0).all():
         res = ops(str_ser)
         expected = pd.Series(["foo" * x for x in data], index=s.index)
         expected = expected.fillna(np.nan)
@@ -227,7 +214,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
         #  more-correct than np.nan here.
         tm.assert_series_equal(res, expected)
     else:
-        with pytest.raises(errs, match=msg):
+        with pytest.raises(TypeError, match=msg):
             ops(str_ser)
 
     msg = "|".join(
@@ -242,9 +229,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "cannot subtract DatetimeArray from ndarray",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series(pd.date_range("20180101", periods=len(s))))
 
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index ff9f3cbed64a2..547114ecfddd0 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -24,7 +24,7 @@ class BaseOpsUtil:
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         # Find the Exception, if any we expect to raise calling
         #  obj.__op_name__(other)
 
@@ -39,14 +39,6 @@ def _get_expected_exception(
         else:
             result = self.frame_scalar_exc
 
-        if using_string_dtype() and result is not None:
-            import pyarrow as pa
-
-            result = (  # type: ignore[assignment]
-                result,
-                pa.lib.ArrowNotImplementedError,
-                NotImplementedError,
-            )
         return result
 
     def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 070feb1fec4b9..e0b35b7450303 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -68,7 +68,7 @@ def data_for_grouping():
 class TestDecimalArray(base.ExtensionTests):
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         return None
 
     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index dbf353d87178f..cdfac633e2ec1 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -807,8 +807,6 @@ def test_value_counts_returns_pyarrow_int64(self, data):
 
     _combine_le_expected_dtype = "bool[pyarrow]"
 
-    divmod_exc = NotImplementedError
-
     def get_op_from_name(self, op_name):
         short_opname = op_name.strip("_")
         if short_opname == "rtruediv":
@@ -942,10 +940,11 @@ def _is_temporal_supported(self, opname, pa_dtype):
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         if op_name in ("__divmod__", "__rdivmod__"):
-            return self.divmod_exc
+            return (NotImplementedError, TypeError)
 
+        exc: type[Exception] | tuple[type[Exception], ...] | None
         dtype = tm.get_dtype(obj)
         # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no
         # attribute "pyarrow_dtype"
@@ -956,7 +955,7 @@ def _get_expected_exception(
             "__mod__",
             "__rmod__",
         }:
-            exc = NotImplementedError
+            exc = (NotImplementedError, TypeError)
         elif arrow_temporal_supported:
             exc = None
         elif op_name in ["__add__", "__radd__"] and (
@@ -968,10 +967,7 @@ def _get_expected_exception(
             or pa.types.is_integer(pa_dtype)
             or pa.types.is_decimal(pa_dtype)
         ):
-            # TODO: in many of these cases, e.g. non-duration temporal,
-            #  these will *never* be allowed. Would it make more sense to
-            #  re-raise as TypeError, more consistent with non-pyarrow cases?
-            exc = pa.ArrowNotImplementedError
+            exc = TypeError
         else:
             exc = None
         return exc
@@ -1027,14 +1023,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
 
         if all_arithmetic_operators == "__rmod__" and pa.types.is_binary(pa_dtype):
             pytest.skip("Skip testing Python string formatting")
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
@@ -1049,14 +1037,6 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
             pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype)
         ):
             pytest.skip("Skip testing Python string formatting")
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
@@ -1080,14 +1060,6 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
                     ),
                 )
             )
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index b59c10824c5c4..e1f2db149edf9 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -165,24 +165,15 @@ def test_fillna_no_op_returns_copy(self, data):
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
-        if op_name in ["__divmod__", "__rdivmod__"]:
-            if (
-                isinstance(obj, pd.Series)
-                and cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow"
-            ):
-                # TODO: re-raise as TypeError?
-                return NotImplementedError
-            elif (
-                isinstance(other, pd.Series)
-                and cast(StringDtype, tm.get_dtype(other)).storage == "pyarrow"
-            ):
-                # TODO: re-raise as TypeError?
-                return NotImplementedError
-            return TypeError
-        elif op_name in ["__mod__", "__rmod__", "__pow__", "__rpow__"]:
-            if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
-                return NotImplementedError
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
+        if op_name in [
+            "__mod__",
+            "__rmod__",
+            "__divmod__",
+            "__rdivmod__",
+            "__pow__",
+            "__rpow__",
+        ]:
             return TypeError
         elif op_name in ["__mul__", "__rmul__"]:
             # Can only multiply strings by integers
@@ -195,11 +186,6 @@ def _get_expected_exception(
             "__sub__",
             "__rsub__",
         ]:
-            if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
-                import pyarrow as pa
-
-                # TODO: better to re-raise as TypeError?
-                return pa.ArrowNotImplementedError
             return TypeError
 
         return None
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
index 6788721e8a72e..fb43578744eb2 100644
--- a/pandas/tests/frame/test_logical_ops.py
+++ b/pandas/tests/frame/test_logical_ops.py
@@ -4,10 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -100,9 +96,6 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
@@ -114,15 +107,12 @@ def test_logical_ops_invalid(self, using_infer_string):
 
         df1 = DataFrame("foo", index=[1], columns=["A"])
         df2 = DataFrame(True, index=[1], columns=["A"])
-        msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
-        if using_infer_string:
-            import pyarrow as pa
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match="|has no kernel"):
-                df1 | df2
+        if using_infer_string and df1["A"].dtype.storage == "pyarrow":
+            msg = "operation 'or_' not supported for dtype 'str'"
         else:
-            with pytest.raises(TypeError, match=msg):
-                df1 | df2
+            msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
+        with pytest.raises(TypeError, match=msg):
+            df1 | df2
 
     def test_logical_operators(self):
         def _check_bin_op(op):
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index 5bbe047078c6e..217255e73b450 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -43,11 +43,6 @@ def test_neg_object(self, df, expected):
         tm.assert_frame_equal(-df, expected)
         tm.assert_series_equal(-df["a"], expected["a"])
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.parametrize(
         "df_data",
         [
@@ -59,22 +54,13 @@ def test_neg_raises(self, df_data, using_infer_string):
         df = pd.DataFrame({"a": df_data})
         msg = (
             "bad operand type for unary -: 'str'|"
-            r"bad operand type for unary -: 'DatetimeArray'"
+            r"bad operand type for unary -: 'DatetimeArray'|"
+            "unary '-' not supported for dtype"
         )
-        if using_infer_string and df.dtypes.iloc[0] == "string":
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                (-df)
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                (-df["a"])
-
-        else:
-            with pytest.raises(TypeError, match=msg):
-                (-df)
-            with pytest.raises(TypeError, match=msg):
-                (-df["a"])
+        with pytest.raises(TypeError, match=msg):
+            (-df)
+        with pytest.raises(TypeError, match=msg):
+            (-df["a"])
 
     def test_invert(self, float_frame):
         df = float_frame
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index e3428d1060dbe..2e9ba007a45c1 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -3,13 +3,10 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.missing import (
     NA,
     is_matching_na,
 )
-from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -32,39 +29,25 @@ def test_get_indexer_strings(self, method, expected):
 
         tm.assert_numpy_array_equal(actual, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_get_indexer_strings_raises(self, using_infer_string):
         index = Index(["b", "c"])
 
-        if using_infer_string:
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="nearest")
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(
-                    ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
-                )
-
-        else:
-            msg = r"unsupported operand type\(s\) for -: 'str' and 'str'"
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="nearest")
+        msg = "|".join(
+            [
+                "operation 'sub' not supported for dtype 'str'",
+                r"unsupported operand type\(s\) for -: 'str' and 'str'",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="nearest")
 
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
 
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(
-                    ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
-                )
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(
+                ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
+            )
 
     def test_get_indexer_with_NA_values(
         self, unique_nulls_fixture, unique_nulls_fixture2
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 9993a21d93f12..b41871ee921fd 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -836,7 +836,6 @@ def test_append_preserves_dtype(self, simple_index):
         alt = index.take(list(range(N)) * 2)
         tm.assert_index_equal(result, alt, check_exact=True)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_inv(self, simple_index, using_infer_string):
         idx = simple_index
 
@@ -850,21 +849,14 @@ def test_inv(self, simple_index, using_infer_string):
             tm.assert_series_equal(res2, Series(expected))
         else:
             if idx.dtype.kind == "f":
-                err = TypeError
                 msg = "ufunc 'invert' not supported for the input types"
-            elif using_infer_string and idx.dtype == "string":
-                import pyarrow as pa
-
-                err = pa.lib.ArrowNotImplementedError
-                msg = "has no kernel"
             else:
-                err = TypeError
-                msg = "bad operand"
-            with pytest.raises(err, match=msg):
+                msg = "bad operand|__invert__ is not supported for string dtype"
+            with pytest.raises(TypeError, match=msg):
                 ~idx
 
             # check that we get the same behavior with Series
-            with pytest.raises(err, match=msg):
+            with pytest.raises(TypeError, match=msg):
                 ~Series(idx)
 
 
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index ff84b5c52183b..e7d284bd47e21 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas._libs.tslibs import IncompatibleFrequency
 
@@ -214,9 +212,9 @@ def test_series_integer_mod(self, index):
         s1 = Series(range(1, 10))
         s2 = Series("foo", index=index)
 
-        msg = "not all arguments converted during string formatting|mod not"
+        msg = "not all arguments converted during string formatting|'mod' not supported"
 
-        with pytest.raises((TypeError, NotImplementedError), match=msg):
+        with pytest.raises(TypeError, match=msg):
             s2 % s1
 
     def test_add_with_duplicate_index(self):
@@ -502,28 +500,14 @@ def test_ser_cmp_result_names(self, names, comparison_op):
             result = op(ser, cidx)
             assert result.name == names[2]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_comparisons(self, using_infer_string):
+    def test_comparisons(self):
         s = Series(["a", "b", "c"])
         s2 = Series([False, True, False])
 
         # it works!
         exp = Series([False, False, False])
-        if using_infer_string:
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            # TODO(3.0) GH56008
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                s == s2
-            with tm.assert_produces_warning(
-                DeprecationWarning, match="comparison", check_stacklevel=False
-            ):
-                with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                    s2 == s
-        else:
-            tm.assert_series_equal(s == s2, exp)
-            tm.assert_series_equal(s2 == s, exp)
+        tm.assert_series_equal(s == s2, exp)
+        tm.assert_series_equal(s2 == s, exp)
 
     # -----------------------------------------------------------------
     # Categorical Dtype Comparisons
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
index baed3ba936699..1586195e79a9d 100644
--- a/pandas/tests/series/test_logical_ops.py
+++ b/pandas/tests/series/test_logical_ops.py
@@ -4,10 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 from pandas import (
     ArrowDtype,
     DataFrame,
@@ -146,10 +142,7 @@ def test_logical_operators_int_dtype_with_bool(self):
         expected = Series([False, True, True, True])
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
-    def test_logical_operators_int_dtype_with_object(self, using_infer_string):
+    def test_logical_operators_int_dtype_with_object(self):
         # GH#9016: support bitwise op for integer types
         s_0123 = Series(range(4), dtype="int64")
 
@@ -158,14 +151,10 @@ def test_logical_operators_int_dtype_with_object(self, using_infer_string):
         tm.assert_series_equal(result, expected)
 
         s_abNd = Series(["a", "b", np.nan, "d"])
-        if using_infer_string:
-            import pyarrow as pa
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
-                s_0123 & s_abNd
-        else:
-            with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
-                s_0123 & s_abNd
+        with pytest.raises(
+            TypeError, match="unsupported.* 'int' and 'str'|'rand_' not supported"
+        ):
+            s_0123 & s_abNd
 
     def test_logical_operators_bool_dtype_with_int(self):
         index = list("bca")
@@ -356,7 +345,6 @@ def test_reverse_ops_with_index(self, op, expected):
         expected = Series(expected)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_logical_ops_label_based(self, using_infer_string):
         # GH#4947
         # logical ops should be label based
@@ -422,15 +410,12 @@ def test_logical_ops_label_based(self, using_infer_string):
             tm.assert_series_equal(result, a[a])
 
         for e in [Series(["z"])]:
-            warn = FutureWarning if using_infer_string else None
             if using_infer_string:
-                import pyarrow as pa
-
-                with tm.assert_produces_warning(warn, match="Operation between non"):
-                    with pytest.raises(
-                        pa.lib.ArrowNotImplementedError, match="has no kernel"
-                    ):
-                        result = a[a | e]
+                # TODO(infer_string) should this behave differently?
+                with pytest.raises(
+                    TypeError, match="not supported for dtype|unsupported operand type"
+                ):
+                    result = a[a | e]
             else:
                 result = a[a | e]
             tm.assert_series_equal(result, a[a])

From 5700efeb9b8e428b53c8aa336591832fdb9fda89 Mon Sep 17 00:00:00 2001
From: Ankit Dhokariya <67553771+ankit-dhokariya@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:42:00 -0700
Subject: [PATCH 018/176] DOC: Enforce Numpy Docstring Validation (Issue
 #59458) (#59622)

* adding docstring for Timestamp properties

* updating code_checks.sh

* removing extra line

* fixing code_checks.sh
---
 ci/code_checks.sh                  |   5 --
 pandas/_libs/tslibs/timestamps.pyx | 115 +++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 25317a08ca7b0..79a6db3be7fef 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -161,15 +161,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.fold GL08" \
-        -i "pandas.Timestamp.hour GL08" \
         -i "pandas.Timestamp.max PR02" \
-        -i "pandas.Timestamp.microsecond GL08" \
         -i "pandas.Timestamp.min PR02" \
-        -i "pandas.Timestamp.minute GL08" \
-        -i "pandas.Timestamp.month GL08" \
         -i "pandas.Timestamp.nanosecond GL08" \
         -i "pandas.Timestamp.resolution PR02" \
-        -i "pandas.Timestamp.second GL08" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index a9463ce8ad044..ec976f17af396 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -984,6 +984,121 @@ cdef class _Timestamp(ABCTimestamp):
         """
         return super().day
 
+    @property
+    def month(self) -> int:
+        """
+        Return the month of the Timestamp.
+
+        Returns
+        -------
+        int
+            The month of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.day : Return the day of the year.
+        Timestamp.year : Return the year of the week.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.month
+        8
+        """
+        return super().month
+
+    @property
+    def hour(self) -> int:
+        """
+        Return the hour of the Timestamp.
+
+        Returns
+        -------
+        int
+            The hour of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.minute : Return the minute of the Timestamp.
+        Timestamp.second : Return the second of the Timestamp.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.hour
+        16
+        """
+        return super().hour
+
+    @property
+    def minute(self) -> int:
+        """
+        Return the minute of the Timestamp.
+
+        Returns
+        -------
+        int
+            The minute of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.hour : Return the hour of the Timestamp.
+        Timestamp.second : Return the second of the Timestamp.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.minute
+        16
+        """
+        return super().minute
+
+    @property
+    def second(self) -> int:
+        """
+        Return the second of the Timestamp.
+
+        Returns
+        -------
+        int
+            The second of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.microsecond : Return the microsecond of the Timestamp.
+        Timestamp.minute : Return the minute of the Timestamp.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.second
+        30
+        """
+        return super().second
+
+    @property
+    def microsecond(self) -> int:
+        """
+        Return the microsecond of the Timestamp.
+
+        Returns
+        -------
+        int
+            The microsecond of the Timestamp.
+
+        See Also
+        --------
+        Timestamp.second : Return the second of the Timestamp.
+        Timestamp.minute : Return the minute of the Timestamp.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30.2304")
+        >>> ts.microsecond
+        230400
+        """
+        return super().microsecond
+
     @property
     def week(self) -> int:
         """

From 600bbd55f566b7aafa55c859c0e26543f60d6f0f Mon Sep 17 00:00:00 2001
From: james-magee <43799880+james-magee@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:43:05 -0700
Subject: [PATCH 019/176] DOC: fix docstring validation errors for
 pandas.Series (#59621)

* fixed Series.std and Series.sem docstrings via make_doc

* removed Series.sem, Series.std from ci/code_checks.sh

* fixed errors from pre-commit

* ran pre-commit
---
 ci/code_checks.sh      |  2 --
 pandas/core/generic.py | 40 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 79a6db3be7fef..72678be1587f8 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -134,14 +134,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
-        -i "pandas.Series.sem PR01,RT03,SA01" \
         -i "pandas.Series.sparse PR01,SA01" \
         -i "pandas.Series.sparse.fill_value SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
-        -i "pandas.Series.std PR01,RT03,SA01" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index eae3249aa79a4..3109b67a4fc43 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11847,14 +11847,44 @@ def last_valid_index(self) -> Hashable:
     where N represents the number of elements.
 numeric_only : bool, default False
     Include only float, int, boolean columns. Not implemented for Series.
+**kwargs :
+    Additional keywords have no effect but might be accepted
+    for compatibility with NumPy.
 
 Returns
 -------
-{name1} or {name2} (if level specified) \
+{name1} or {name2} (if level specified)
+    {return_desc}
+
+See Also
+--------
+{see_also}\
 {notes}\
 {examples}
 """
 
+_sem_see_also = """\
+scipy.stats.sem : Compute standard error of the mean.
+{name2}.std : Return sample standard deviation over requested axis.
+{name2}.var : Return unbiased variance over requested axis.
+{name2}.mean : Return the mean of the values over the requested axis.
+{name2}.median : Return the median of the values over the requested axis.
+{name2}.mode : Return the mode(s) of the Series."""
+
+_sem_return_desc = """\
+Unbiased standard error of the mean over requested axis."""
+
+_std_see_also = """\
+numpy.std : Compute the standard deviation along the specified axis.
+{name2}.var : Return unbiased variance over requested axis.
+{name2}.sem : Return unbiased standard error of the mean over requested axis.
+{name2}.mean : Return the mean of the values over the requested axis.
+{name2}.median : Return the median of the values over the requested axis.
+{name2}.mode : Return the mode(s) of the Series."""
+
+_std_return_desc = """\
+Standard deviation over requested axis."""
+
 _std_notes = """
 
 Notes
@@ -12706,8 +12736,8 @@ def make_doc(name: str, ndim: int) -> str:
             "ddof argument."
         )
         examples = _std_examples
-        see_also = ""
-        kwargs = {"notes": _std_notes}
+        see_also = _std_see_also.format(name2=name2)
+        kwargs = {"notes": "", "return_desc": _std_return_desc}
 
     elif name == "sem":
         base_doc = _num_ddof_doc
@@ -12751,8 +12781,8 @@ def make_doc(name: str, ndim: int) -> str:
             >>> df.sem(numeric_only=True)
             a   0.5
             dtype: float64"""
-        see_also = ""
-        kwargs = {"notes": ""}
+        see_also = _sem_see_also.format(name2=name2)
+        kwargs = {"notes": "", "return_desc": _sem_return_desc}
 
     elif name == "skew":
         base_doc = _num_doc

From c375533d670a7114c36ebb114c01ec7d57b92753 Mon Sep 17 00:00:00 2001
From: Shubhankar Agrawal <shubhankar.a31@gmail.com>
Date: Wed, 28 Aug 2024 00:49:42 +0800
Subject: [PATCH 020/176] ENH: Enhancement for spss kwargs (#56434)

* Bug fix for spss kwargs

* Update to whatsnew with bug fix details

* Black formatting fix

* Fix for whatsnew rst pre-commit

* Sort whatsnew alphabetically

* Fixing alphabetical sorting to whatsnew

* Sorting whatsnew

* Fixing kwargs

* Fixing tests

* Test fixes

* Minor change in kwargs

* Resolving PR comments

* Resolving mypy issue

* Docstring update for kwargs mypy

* Updates based on PR comments

* Fixing kwargs types

* Finalizing updates based on suggestions

* Updating to try to fix other random test fails

* Unrelated test fix

* Test fixes for dims/sizes

* Updates to whatsnew

* Case fix

* Update to reword to enhancements
---
 doc/source/whatsnew/v3.0.0.rst |  1 +
 pandas/io/spss.py              | 15 +++++++++++++--
 pandas/tests/io/test_spss.py   | 16 ++++++++++++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index eaee01eacb97b..077e9cef2a03e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -32,6 +32,7 @@ Other enhancements
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
+- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
 - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
diff --git a/pandas/io/spss.py b/pandas/io/spss.py
index e597463aee453..dfada10c719c9 100644
--- a/pandas/io/spss.py
+++ b/pandas/io/spss.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -24,6 +27,7 @@ def read_spss(
     usecols: Sequence[str] | None = None,
     convert_categoricals: bool = True,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    **kwargs: Any,
 ) -> DataFrame:
     """
     Load an SPSS file from the file path, returning a DataFrame.
@@ -47,6 +51,10 @@ def read_spss(
           nullable :class:`ArrowDtype` :class:`DataFrame`
 
         .. versionadded:: 2.0
+    **kwargs
+        Additional keyword arguments that can be passed to :func:`pyreadstat.read_sav`.
+
+        .. versionadded:: 3.0
 
     Returns
     -------
@@ -74,7 +82,10 @@ def read_spss(
         usecols = list(usecols)  # pyreadstat requires a list
 
     df, metadata = pyreadstat.read_sav(
-        stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals
+        stringify_path(path),
+        usecols=usecols,
+        apply_value_formats=convert_categoricals,
+        **kwargs,
     )
     df.attrs = metadata.__dict__
     if dtype_backend is not lib.no_default:
diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py
index e118c90d9bc02..1aa9f6dca0303 100644
--- a/pandas/tests/io/test_spss.py
+++ b/pandas/tests/io/test_spss.py
@@ -65,6 +65,22 @@ def test_spss_labelled_str(datapath):
     tm.assert_frame_equal(df, expected)
 
 
+@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
+@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
+def test_spss_kwargs(datapath):
+    # test file from the Haven project (https://haven.tidyverse.org/)
+    # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
+    fname = datapath("io", "data", "spss", "labelled-str.sav")
+
+    df = pd.read_spss(fname, convert_categoricals=True, row_limit=1)
+    expected = pd.DataFrame({"gender": ["Male"]}, dtype="category")
+    tm.assert_frame_equal(df, expected)
+
+    df = pd.read_spss(fname, convert_categoricals=False, row_offset=1)
+    expected = pd.DataFrame({"gender": ["F"]})
+    tm.assert_frame_equal(df, expected)
+
+
 @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
 @pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
 def test_spss_umlauts(datapath):

From 172e477378b0b890de13001175cc0c1ad73b11c5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Aug 2024 19:51:35 +0200
Subject: [PATCH 021/176] DOC: Add whatsnew for 2.3.0 (#59625)

* DOC: Add whatsnew for 2.3.0

* fix duplicate label
---
 doc/source/whatsnew/index.rst  |   7 ++
 doc/source/whatsnew/v2.3.0.rst | 177 +++++++++++++++++++++++++++++++++
 doc/source/whatsnew/v3.0.0.rst |   2 +-
 3 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 doc/source/whatsnew/v2.3.0.rst

diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 1a1ecdd0effee..2f7ec52d117f8 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -18,6 +18,13 @@ Version 3.0
 
    v3.0.0
 
+Version 2.3
+-----------
+
+.. toctree::
+   :maxdepth: 2
+
+   v2.3.0
 
 Version 2.2
 -----------
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
new file mode 100644
index 0000000000000..d1881bf04826f
--- /dev/null
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -0,0 +1,177 @@
+.. _whatsnew_230:
+
+What's new in 2.3.0 (Month XX, 2024)
+------------------------------------
+
+These are the changes in pandas 2.3.0. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_230.upcoming_changes:
+
+Upcoming changes in pandas 3.0
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+.. _whatsnew_230.enhancements:
+
+Enhancements
+~~~~~~~~~~~~
+
+.. _whatsnew_230.enhancements.enhancement1:
+
+enhancement1
+^^^^^^^^^^^^
+
+
+.. _whatsnew_230.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.notable_bug_fixes:
+
+Notable bug fixes
+~~~~~~~~~~~~~~~~~
+
+These are bug fixes that might have notable behavior changes.
+
+.. _whatsnew_230.notable_bug_fixes.notable_bug_fix1:
+
+notable_bug_fix1
+^^^^^^^^^^^^^^^^
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.performance:
+
+Performance improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+
+Categorical
+^^^^^^^^^^^
+-
+-
+
+Datetimelike
+^^^^^^^^^^^^
+-
+-
+
+Timedelta
+^^^^^^^^^
+-
+-
+
+Timezones
+^^^^^^^^^
+-
+-
+
+Numeric
+^^^^^^^
+-
+-
+
+Conversion
+^^^^^^^^^^
+-
+-
+
+Strings
+^^^^^^^
+-
+-
+
+Interval
+^^^^^^^^
+-
+-
+
+Indexing
+^^^^^^^^
+-
+-
+
+Missing
+^^^^^^^
+-
+-
+
+MultiIndex
+^^^^^^^^^^
+-
+-
+
+I/O
+^^^
+-
+-
+
+Period
+^^^^^^
+-
+-
+
+Plotting
+^^^^^^^^
+-
+-
+
+Groupby/resample/rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+-
+-
+
+Reshaping
+^^^^^^^^^
+-
+-
+
+Sparse
+^^^^^^
+-
+-
+
+ExtensionArray
+^^^^^^^^^^^^^^
+-
+-
+
+Styler
+^^^^^^
+-
+-
+
+Other
+^^^^^
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.contributors:
+
+Contributors
+~~~~~~~~~~~~
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 077e9cef2a03e..da0d85b7bb529 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1,4 +1,4 @@
-.. _whatsnew_230:
+.. _whatsnew_300:
 
 What's new in 3.0.0 (Month XX, 2024)
 ------------------------------------

From 306385b0c157da62595a4b3efb23799b7d136c62 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2024 15:54:15 -0700
Subject: [PATCH 022/176] BUG (string): str.replace with negative n (#59628)

* BUG (string): str.replace with negative n

* update GH ref
---
 doc/source/whatsnew/v2.3.0.rst       |  2 +-
 pandas/core/arrays/string_arrow.py   |  4 +---
 pandas/tests/extension/test_arrow.py | 11 +++++++++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index d1881bf04826f..528226502da33 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -102,7 +102,7 @@ Conversion
 
 Strings
 ^^^^^^^
--
+- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
 -
 
 Interval
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 67114815341b6..7c359d1a3132b 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -350,9 +350,7 @@ def _str_replace(
                 fallback_performancewarning()
             return super()._str_replace(pat, repl, n, case, flags, regex)
 
-        func = pc.replace_substring_regex if regex else pc.replace_substring
-        result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
-        return type(self)(result)
+        return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)
 
     def _str_repeat(self, repeats: int | Sequence[int]):
         if not isinstance(repeats, int):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index cdfac633e2ec1..3dbdda388d035 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1855,6 +1855,17 @@ def test_str_replace_negative_n():
     expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string()))
     tm.assert_series_equal(expected, actual)
 
+    # Same bug for pyarrow-backed StringArray GH#59628
+    ser2 = ser.astype(pd.StringDtype(storage="pyarrow"))
+    actual2 = ser2.str.replace("a", "", -3, True)
+    expected2 = expected.astype(ser2.dtype)
+    tm.assert_series_equal(expected2, actual2)
+
+    ser3 = ser.astype(pd.StringDtype(storage="pyarrow", na_value=np.nan))
+    actual3 = ser3.str.replace("a", "", -3, True)
+    expected3 = expected.astype(ser3.dtype)
+    tm.assert_series_equal(expected3, actual3)
+
 
 def test_str_repeat_unsupported():
     ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))

From eb05b13cbce55c6778f6356088430a16ad6cbf2d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2024 16:27:27 -0700
Subject: [PATCH 023/176] PERF: avoid unnecessary copy (#59630)

---
 pandas/core/arrays/string_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 1d8678a0bb7e7..143a13c54dbbb 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -660,7 +660,7 @@ def __arrow_array__(self, type=None):
         return pa.array(values, type=type, from_pandas=True)
 
     def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:  # type: ignore[override]
-        arr = self._ndarray.copy()
+        arr = self._ndarray
 
         return arr, self.dtype.na_value
 

From ef3368a8046f3c2e98c773be179f0a49a51d4bdc Mon Sep 17 00:00:00 2001
From: Pol Rius <polriusgor@gmail.com>
Date: Wed, 28 Aug 2024 03:42:47 +0200
Subject: [PATCH 024/176] DOC: fix docstring validation errors for
 pandas.Series (#59633)

* fixed Series.sparse and Series.sparse.to_coo and removed from ci/code_checks.sh

* fixed errors from pre-commit

* fixed errors from pre-commit
---
 ci/code_checks.sh                     |  2 --
 pandas/core/arrays/sparse/accessor.py | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 72678be1587f8..0d1c362a87f12 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -134,12 +134,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
-        -i "pandas.Series.sparse PR01,SA01" \
         -i "pandas.Series.sparse.fill_value SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Series.sparse.sp_values SA01" \
-        -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index b8245349a4e62..e610e018c5a74 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -47,6 +47,18 @@ class SparseAccessor(BaseAccessor, PandasDelegate):
     """
     Accessor for SparseSparse from other sparse matrix data types.
 
+    Parameters
+    ----------
+    data : Series or DataFrame
+        The Series or DataFrame to which the SparseAccessor is attached.
+
+    See Also
+    --------
+    Series.sparse.to_coo : Create a scipy.sparse.coo_matrix from a Series with
+        MultiIndex.
+    Series.sparse.from_coo : Create a Series with sparse values from a
+        scipy.sparse.coo_matrix.
+
     Examples
     --------
     >>> ser = pd.Series([0, 0, 2, 2, 2], dtype="Sparse[int]")
@@ -135,7 +147,9 @@ def to_coo(
         Parameters
         ----------
         row_levels : tuple/list
+            MultiIndex levels to use for row coordinates, specified by name or index.
         column_levels : tuple/list
+            MultiIndex levels to use for column coordinates, specified by name or index.
         sort_labels : bool, default False
             Sort the row and column labels before forming the sparse matrix.
             When `row_levels` and/or `column_levels` refer to a single level,
@@ -144,8 +158,16 @@ def to_coo(
         Returns
         -------
         y : scipy.sparse.coo_matrix
+            The sparse matrix in coordinate format.
         rows : list (row labels)
+            Labels corresponding to the row coordinates.
         columns : list (column labels)
+            Labels corresponding to the column coordinates.
+
+        See Also
+        --------
+        Series.sparse.from_coo : Create a Series with sparse values from a
+            scipy.sparse.coo_matrix.
 
         Examples
         --------

From 5285ff811acddd4b1e91b143c576d5c537952bb0 Mon Sep 17 00:00:00 2001
From: Harsha Lakamsani <hlakamsani@gmail.com>
Date: Wed, 28 Aug 2024 10:02:41 -0700
Subject: [PATCH 025/176]  DOC: fix docstring validation errors for
 pandas.PeriodIndex (#59638)

* DOC: pandas.PeriodIndex GL08, PR07, RT03, and SA01 docstring validation errors fixed

* DOC: pandas.Series.dt.qyear docstring validation error addressed by existing series.PeriodIndex.qyear docstring validation fix
---
 ci/code_checks.sh             |  22 -----
 pandas/core/arrays/period.py  | 148 +++++++++++++++++++++++++++++++++-
 pandas/core/indexes/period.py |  18 +++++
 3 files changed, 165 insertions(+), 23 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 0d1c362a87f12..0fa886dc27abd 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -81,27 +81,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.to_timestamp SA01" \
         -i "pandas.PeriodDtype SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
-        -i "pandas.PeriodIndex.day SA01" \
-        -i "pandas.PeriodIndex.day_of_week SA01" \
-        -i "pandas.PeriodIndex.day_of_year SA01" \
-        -i "pandas.PeriodIndex.dayofweek SA01" \
-        -i "pandas.PeriodIndex.dayofyear SA01" \
-        -i "pandas.PeriodIndex.days_in_month SA01" \
-        -i "pandas.PeriodIndex.daysinmonth SA01" \
-        -i "pandas.PeriodIndex.from_fields PR07,SA01" \
-        -i "pandas.PeriodIndex.from_ordinals SA01" \
-        -i "pandas.PeriodIndex.hour SA01" \
-        -i "pandas.PeriodIndex.is_leap_year SA01" \
-        -i "pandas.PeriodIndex.minute SA01" \
-        -i "pandas.PeriodIndex.month SA01" \
-        -i "pandas.PeriodIndex.quarter SA01" \
-        -i "pandas.PeriodIndex.qyear GL08" \
-        -i "pandas.PeriodIndex.second SA01" \
-        -i "pandas.PeriodIndex.to_timestamp RT03,SA01" \
-        -i "pandas.PeriodIndex.week SA01" \
-        -i "pandas.PeriodIndex.weekday SA01" \
-        -i "pandas.PeriodIndex.weekofyear SA01" \
-        -i "pandas.PeriodIndex.year SA01" \
         -i "pandas.RangeIndex PR07" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.start SA01" \
@@ -124,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.month_name PR01,PR02" \
         -i "pandas.Series.dt.nanoseconds SA01" \
         -i "pandas.Series.dt.normalize PR01" \
-        -i "pandas.Series.dt.qyear GL08" \
         -i "pandas.Series.dt.round PR01,PR02" \
         -i "pandas.Series.dt.seconds SA01" \
         -i "pandas.Series.dt.strftime PR01,PR02" \
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index b3513dd083e41..aa8dacbd6aad5 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -432,6 +432,15 @@ def __arrow_array__(self, type=None):
         """
         The year of the period.
 
+        See Also
+        --------
+        PeriodIndex.day_of_year : The ordinal day of the year.
+        PeriodIndex.dayofyear : The ordinal day of the year.
+        PeriodIndex.is_leap_year : Logical indicating if the date belongs to a
+            leap year.
+        PeriodIndex.weekofyear : The week ordinal of the year.
+        PeriodIndex.year : The year of the period.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
@@ -444,6 +453,11 @@ def __arrow_array__(self, type=None):
         """
         The month as January=1, December=12.
 
+        See Also
+        --------
+        PeriodIndex.days_in_month : The number of days in the month.
+        PeriodIndex.daysinmonth : The number of days in the month.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
@@ -456,6 +470,16 @@ def __arrow_array__(self, type=None):
         """
         The days of the period.
 
+        See Also
+        --------
+        PeriodIndex.day_of_week : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.day_of_year : The ordinal day of the year.
+        PeriodIndex.dayofweek : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.dayofyear : The ordinal day of the year.
+        PeriodIndex.days_in_month : The number of days in the month.
+        PeriodIndex.daysinmonth : The number of days in the month.
+        PeriodIndex.weekday : The day of the week with Monday=0, Sunday=6.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(['2020-01-31', '2020-02-28'], freq='D')
@@ -468,6 +492,12 @@ def __arrow_array__(self, type=None):
         """
         The hour of the period.
 
+        See Also
+        --------
+        PeriodIndex.minute : The minute of the period.
+        PeriodIndex.second : The second of the period.
+        PeriodIndex.to_timestamp : Cast to DatetimeArray/Index.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01-01 10:00", "2023-01-01 11:00"], freq='h')
@@ -480,6 +510,12 @@ def __arrow_array__(self, type=None):
         """
         The minute of the period.
 
+        See Also
+        --------
+        PeriodIndex.hour : The hour of the period.
+        PeriodIndex.second : The second of the period.
+        PeriodIndex.to_timestamp : Cast to DatetimeArray/Index.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01-01 10:30:00",
@@ -493,6 +529,12 @@ def __arrow_array__(self, type=None):
         """
         The second of the period.
 
+        See Also
+        --------
+        PeriodIndex.hour : The hour of the period.
+        PeriodIndex.minute : The minute of the period.
+        PeriodIndex.to_timestamp : Cast to DatetimeArray/Index.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01-01 10:00:30",
@@ -506,6 +548,14 @@ def __arrow_array__(self, type=None):
         """
         The week ordinal of the year.
 
+        See Also
+        --------
+        PeriodIndex.day_of_week : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.dayofweek : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.week : The week ordinal of the year.
+        PeriodIndex.weekday : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.year : The year of the period.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
@@ -519,6 +569,17 @@ def __arrow_array__(self, type=None):
         """
         The day of the week with Monday=0, Sunday=6.
 
+        See Also
+        --------
+        PeriodIndex.day : The days of the period.
+        PeriodIndex.day_of_week : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.day_of_year : The ordinal day of the year.
+        PeriodIndex.dayofweek : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.dayofyear : The ordinal day of the year.
+        PeriodIndex.week : The week ordinal of the year.
+        PeriodIndex.weekday : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.weekofyear : The week ordinal of the year.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01-01", "2023-01-02", "2023-01-03"], freq="D")
@@ -533,6 +594,17 @@ def __arrow_array__(self, type=None):
         """
         The ordinal day of the year.
 
+        See Also
+        --------
+        PeriodIndex.day : The days of the period.
+        PeriodIndex.day_of_week : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.day_of_year : The ordinal day of the year.
+        PeriodIndex.dayofweek : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.dayofyear : The ordinal day of the year.
+        PeriodIndex.weekday : The day of the week with Monday=0, Sunday=6.
+        PeriodIndex.weekofyear : The week ordinal of the year.
+        PeriodIndex.year : The year of the period.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01-10", "2023-02-01", "2023-03-01"], freq="D")
@@ -551,6 +623,11 @@ def __arrow_array__(self, type=None):
         """
         The quarter of the date.
 
+        See Also
+        --------
+        PeriodIndex.qyear : Fiscal year the Period lies in according to its
+            starting-quarter.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
@@ -558,12 +635,62 @@ def __arrow_array__(self, type=None):
         Index([1, 1, 1], dtype='int64')
         """,
     )
-    qyear = _field_accessor("qyear")
+    qyear = _field_accessor(
+        "qyear",
+        """
+        Fiscal year the Period lies in according to its starting-quarter.
+
+        The `year` and the `qyear` of the period will be the same if the fiscal
+        and calendar years are the same. When they are not, the fiscal year
+        can be different from the calendar year of the period.
+
+        Returns
+        -------
+        int
+            The fiscal year of the period.
+
+        See Also
+        --------
+        PeriodIndex.quarter : The quarter of the date.
+        PeriodIndex.year : The year of the period.
+
+        Examples
+        --------
+        If the natural and fiscal year are the same, `qyear` and `year` will
+        be the same.
+
+        >>> per = pd.Period('2018Q1', freq='Q')
+        >>> per.qyear
+        2018
+        >>> per.year
+        2018
+
+        If the fiscal year starts in April (`Q-MAR`), the first quarter of
+        2018 will start in April 2017. `year` will then be 2017, but `qyear`
+        will be the fiscal year, 2018.
+
+        >>> per = pd.Period('2018Q1', freq='Q-MAR')
+        >>> per.start_time
+        Timestamp('2017-04-01 00:00:00')
+        >>> per.qyear
+        2018
+        >>> per.year
+        2017
+        """,
+    )
+
     days_in_month = _field_accessor(
         "days_in_month",
         """
         The number of days in the month.
 
+        See Also
+        --------
+        PeriodIndex.day : The days of the period.
+        PeriodIndex.days_in_month : The number of days in the month.
+        PeriodIndex.daysinmonth : The number of days in the month.
+        PeriodIndex.month : The month as January=1, December=12.
+
         Examples
         --------
         For Series:
@@ -595,6 +722,12 @@ def is_leap_year(self) -> npt.NDArray[np.bool_]:
         """
         Logical indicating if the date belongs to a leap year.
 
+        See Also
+        --------
+        PeriodIndex.qyear : Fiscal year the Period lies in according to its
+            starting-quarter.
+        PeriodIndex.year : The year of the period.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
@@ -618,6 +751,19 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
         Returns
         -------
         DatetimeArray/Index
+            Timestamp representation of given Period-like object.
+
+        See Also
+        --------
+        PeriodIndex.day : The days of the period.
+        PeriodIndex.from_fields : Construct a PeriodIndex from fields
+            (year, month, day, etc.).
+        PeriodIndex.from_ordinals : Construct a PeriodIndex from ordinals.
+        PeriodIndex.hour : The hour of the period.
+        PeriodIndex.minute : The minute of the period.
+        PeriodIndex.month : The month as January=1, December=12.
+        PeriodIndex.second : The second of the period.
+        PeriodIndex.year : The year of the period.
 
         Examples
         --------
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index edd1fdd4da943..b5f05ef0ab78f 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -261,12 +261,19 @@ def from_fields(
         Parameters
         ----------
         year : int, array, or Series, default None
+            Year for the PeriodIndex.
         quarter : int, array, or Series, default None
+            Quarter for the PeriodIndex.
         month : int, array, or Series, default None
+            Month for the PeriodIndex.
         day : int, array, or Series, default None
+            Day for the PeriodIndex.
         hour : int, array, or Series, default None
+            Hour for the PeriodIndex.
         minute : int, array, or Series, default None
+            Minute for the PeriodIndex.
         second : int, array, or Series, default None
+            Second for the PeriodIndex.
         freq : str or period object, optional
             One of pandas period strings or corresponding objects.
 
@@ -274,6 +281,11 @@ def from_fields(
         -------
         PeriodIndex
 
+        See Also
+        --------
+        PeriodIndex.from_ordinals : Construct a PeriodIndex from ordinals.
+        PeriodIndex.to_timestamp : Cast to DatetimeArray/Index.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3])
@@ -311,6 +323,12 @@ def from_ordinals(cls, ordinals, *, freq, name=None) -> Self:
         -------
         PeriodIndex
 
+        See Also
+        --------
+        PeriodIndex.from_fields : Construct a PeriodIndex from fields
+            (year, month, day, etc.).
+        PeriodIndex.to_timestamp : Cast to DatetimeArray/Index.
+
         Examples
         --------
         >>> idx = pd.PeriodIndex.from_ordinals([-1, 0, 1], freq="Q")

From 7750f49657c2398c3c3db1b9c18f7d32672d46d6 Mon Sep 17 00:00:00 2001
From: Harsha Lakamsani <hlakamsani@gmail.com>
Date: Wed, 28 Aug 2024 10:07:01 -0700
Subject: [PATCH 026/176]  DOC: fix docstring validation errors for
 pandas.core.window (#59634)

* DOCS: pandas.core.window SA01 docstring validation error fixed

* DOC: pandas.core.window SA01 docstring validation error linting fixes

* DOC: pandas.core.window.rolling.Window.var PR01 docstring validation fix
---
 ci/code_checks.sh               |  5 -----
 pandas/core/window/expanding.py |  5 +++++
 pandas/core/window/rolling.py   | 26 ++++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 0fa886dc27abd..b775077481e44 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -229,11 +229,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.sum SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
-        -i "pandas.core.window.expanding.Expanding.corr PR01" \
-        -i "pandas.core.window.expanding.Expanding.count PR01" \
-        -i "pandas.core.window.rolling.Rolling.max PR01" \
-        -i "pandas.core.window.rolling.Window.std PR01" \
-        -i "pandas.core.window.rolling.Window.var PR01" \
         -i "pandas.date_range RT03" \
         -i "pandas.errors.AbstractMethodError PR01,SA01" \
         -i "pandas.errors.AttributeConflictWarning SA01" \
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
index d0c8a2e67b6ca..4bf77b3d38689 100644
--- a/pandas/core/window/expanding.py
+++ b/pandas/core/window/expanding.py
@@ -174,6 +174,8 @@ def aggregate(self, func, *args, **kwargs):
 
     @doc(
         template_header,
+        create_section_header("Parameters"),
+        kwargs_numeric_only,
         create_section_header("Returns"),
         template_returns,
         create_section_header("See Also"),
@@ -865,6 +867,9 @@ def cov(
             output will be a MultiIndexed DataFrame in the case of DataFrame
             inputs. In the case of missing elements, only complete pairwise
             observations will be used.
+        ddof : int, default 1
+            Delta Degrees of Freedom.  The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.\n
         """
         ).replace("\n", "", 1),
         kwargs_numeric_only,
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 16aa6d7e56a1c..37dcf3bc14282 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1350,6 +1350,13 @@ def mean(self, numeric_only: bool = False, **kwargs):
     @doc(
         template_header,
         create_section_header("Parameters"),
+        dedent(
+            """
+        ddof : int, default 1
+            Delta Degrees of Freedom.  The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.
+        """
+        ).replace("\n", "", 1),
         kwargs_numeric_only,
         kwargs_scipy,
         create_section_header("Returns"),
@@ -1392,6 +1399,13 @@ def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
     @doc(
         template_header,
         create_section_header("Parameters"),
+        dedent(
+            """
+        ddof : int, default 1
+            Delta Degrees of Freedom.  The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.
+        """
+        ).replace("\n", "", 1),
         kwargs_numeric_only,
         kwargs_scipy,
         create_section_header("Returns"),
@@ -2099,7 +2113,19 @@ def sum(
         template_header,
         create_section_header("Parameters"),
         kwargs_numeric_only,
+        dedent(
+            """
+            *args : iterable, optional
+                Positional arguments passed into ``func``.\n
+            """
+        ).replace("\n", "", 1),
         window_agg_numba_parameters(),
+        dedent(
+            """
+            **kwargs : mapping, optional
+                A dictionary of keyword arguments passed into ``func``.\n
+            """
+        ).replace("\n", "", 1),
         create_section_header("Returns"),
         template_returns,
         create_section_header("See Also"),

From 5ad25d0eba90726e87bda0d5b7f33a5acb7b0f6b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 10:09:48 -0700
Subject: [PATCH 027/176] TST (string): fix xfailed groupby value_counts tests
 (#59632)

---
 .../groupby/methods/test_value_counts.py      | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 18802ebd002fc..da3d626f2d777 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -7,9 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -276,7 +273,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
     return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("groupby", ["column", "array", "function"])
 @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
 @pytest.mark.parametrize(
@@ -289,7 +285,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
 )
 @pytest.mark.parametrize("frame", [True, False])
 def test_against_frame_and_seriesgroupby(
-    education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
+    education_df,
+    groupby,
+    normalize,
+    name,
+    sort,
+    ascending,
+    as_index,
+    frame,
+    request,
+    using_infer_string,
 ):
     # test all parameters:
     # - Use column, array or function as by= parameter
@@ -350,17 +355,24 @@ def test_against_frame_and_seriesgroupby(
             index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
             index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
             del index_frame["both"]
-            index_frame = index_frame.rename({0: None}, axis=1)
-            expected.index = MultiIndex.from_frame(index_frame)
+            index_frame2 = index_frame.rename({0: None}, axis=1)
+            expected.index = MultiIndex.from_frame(index_frame2)
+
+            if index_frame2.columns.isna()[0]:
+                # with using_infer_string, the columns in index_frame as string
+                #  dtype, which makes the rename({0: None}) above use np.nan
+                #  instead of None, so we need to set None more explicitly.
+                expected.index.names = [None] + expected.index.names[1:]
             tm.assert_series_equal(result, expected)
         else:
             expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
             expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
+            if using_infer_string:
+                expected = expected.astype({"gender": "str", "education": "str"})
             del expected["both"]
             tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "dtype",
     [
@@ -387,6 +399,7 @@ def test_compound(
     expected_count,
     expected_group_size,
     dtype,
+    using_infer_string,
 ):
     education_df = education_df.astype(dtype)
     education_df.columns = education_df.columns.astype(dtype)
@@ -409,6 +422,11 @@ def test_compound(
         expected["count"] = expected_count
         if dtype == "string[pyarrow]":
             expected["count"] = expected["count"].convert_dtypes()
+    if using_infer_string and dtype == object:
+        expected = expected.astype(
+            {"country": "str", "gender": "str", "education": "str"}
+        )
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -501,9 +519,6 @@ def test_dropna_combinations(
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, expected_data, expected_index",
     [

From 675293573cf3be8c59b45e34fce20f473186634e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 10:16:45 -0700
Subject: [PATCH 028/176] REF (string): rename result converter methods
 (#59626)

---
 pandas/core/arrays/_arrow_string_mixins.py |  8 +++++
 pandas/core/arrays/arrow/array.py          |  6 ++++
 pandas/core/arrays/string_arrow.py         | 38 +++++++++++-----------
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 06c74290bd82e..341ac2c0b48ec 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -23,6 +23,14 @@ class ArrowStringArrayMixin:
     def __init__(self, *args, **kwargs) -> None:
         raise NotImplementedError
 
+    def _convert_bool_result(self, result):
+        # Convert a bool-dtype result to the appropriate result type
+        raise NotImplementedError
+
+    def _convert_int_result(self, result):
+        # Convert an integer-dtype result to the appropriate result type
+        raise NotImplementedError
+
     def _str_pad(
         self,
         width: int,
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index a374afcacc45a..fbffb4a0a9990 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2311,6 +2311,12 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
             for chunk in self._pa_array.iterchunks()
         ]
 
+    def _convert_bool_result(self, result):
+        return type(self)(result)
+
+    def _convert_int_result(self, result):
+        return type(self)(result)
+
     def _str_count(self, pat: str, flags: int = 0) -> Self:
         if flags:
             raise NotImplementedError(f"count not implemented with {flags=}")
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 7c359d1a3132b..15807c365ecfd 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -221,7 +221,7 @@ def insert(self, loc: int, item) -> ArrowStringArray:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
-    def _result_converter(self, values, na=None):
+    def _convert_bool_result(self, values, na=None):
         if self.dtype.na_value is np.nan:
             if not isna(na):
                 values = values.fill_null(bool(na))
@@ -293,7 +293,7 @@ def _str_contains(
             result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
         else:
             result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
-        result = self._result_converter(result, na=na)
+        result = self._convert_bool_result(result, na=na)
         if not isna(na):
             result[isna(result)] = bool(na)
         return result
@@ -315,7 +315,7 @@ def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
                     result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
         if not isna(na):
             result = result.fill_null(na)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
         if isinstance(pat, str):
@@ -334,7 +334,7 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
                     result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
         if not isna(na):
             result = result.fill_null(na)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_replace(
         self,
@@ -387,43 +387,43 @@ def _str_slice(
 
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isalpha(self):
         result = pc.utf8_is_alpha(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isdecimal(self):
         result = pc.utf8_is_decimal(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isdigit(self):
         result = pc.utf8_is_digit(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_islower(self):
         result = pc.utf8_is_lower(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isnumeric(self):
         result = pc.utf8_is_numeric(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isspace(self):
         result = pc.utf8_is_space(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_istitle(self):
         result = pc.utf8_is_title(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_isupper(self):
         result = pc.utf8_is_upper(self._pa_array)
-        return self._result_converter(result)
+        return self._convert_bool_result(result)
 
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
-        return self._convert_int_dtype(result)
+        return self._convert_int_result(result)
 
     def _str_lower(self) -> Self:
         return type(self)(pc.utf8_lower(self._pa_array))
@@ -470,7 +470,7 @@ def _str_count(self, pat: str, flags: int = 0):
         if flags:
             return super()._str_count(pat, flags)
         result = pc.count_substring_regex(self._pa_array, pat)
-        return self._convert_int_dtype(result)
+        return self._convert_int_result(result)
 
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
         if start != 0 and end is not None:
@@ -484,7 +484,7 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
             result = pc.find_substring(slices, sub)
         else:
             return super()._str_find(sub, start, end)
-        return self._convert_int_dtype(result)
+        return self._convert_int_result(result)
 
     def _str_get_dummies(self, sep: str = "|"):
         dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)
@@ -493,7 +493,7 @@ def _str_get_dummies(self, sep: str = "|"):
         dummies = np.vstack(dummies_pa.to_numpy())
         return dummies.astype(np.int64, copy=False), labels
 
-    def _convert_int_dtype(self, result):
+    def _convert_int_result(self, result):
         if self.dtype.na_value is np.nan:
             if isinstance(result, pa.Array):
                 result = result.to_numpy(zero_copy_only=False)
@@ -520,7 +520,7 @@ def _reduce(
 
         result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
         if name in ("argmin", "argmax") and isinstance(result, pa.Array):
-            return self._convert_int_dtype(result)
+            return self._convert_int_result(result)
         elif isinstance(result, pa.Array):
             return type(self)(result)
         else:
@@ -538,7 +538,7 @@ def _rank(
         """
         See Series.rank.__doc__.
         """
-        return self._convert_int_dtype(
+        return self._convert_int_result(
             self._rank_calc(
                 axis=axis,
                 method=method,

From 1e742f1d480ca43dbc0fd3b7fa8f50f583aff71d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 28 Aug 2024 07:20:21 -1000
Subject: [PATCH 029/176] REF: Consistently use `_reset_cache` for cache
 clearing (#59617)

Standardize cache clearing
---
 pandas/core/indexes/multi.py      | 4 ++--
 pandas/core/internals/managers.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index c3d4ad721c830..ceb71d0b9fbb9 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1302,7 +1302,7 @@ def _view(self) -> MultiIndex:
             verify_integrity=False,
         )
         result._cache = self._cache.copy()
-        result._cache.pop("levels", None)  # GH32669
+        result._reset_cache("levels")  # GH32669
         return result
 
     # --------------------------------------------------------------------
@@ -1384,7 +1384,7 @@ def copy(  # type: ignore[override]
             verify_integrity=False,
         )
         new_index._cache = self._cache.copy()
-        new_index._cache.pop("levels", None)  # GH32669
+        new_index._reset_cache("levels")  # GH32669
         if keep_id:
             new_index._id = self._id
         return new_index
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c42ea44b2fc89..bade5fd1bdcf2 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -2154,7 +2154,7 @@ def setitem_inplace(self, indexer, value) -> None:
         """
         if not self._has_no_reference(0):
             self.blocks = (self._block.copy(),)
-            self._cache.clear()
+            self._reset_cache()
 
         arr = self.array
 
@@ -2179,7 +2179,7 @@ def idelete(self, indexer) -> SingleBlockManager:
         nb = self._block.delete(indexer)[0]
         self.blocks = (nb,)
         self.axes[0] = self.axes[0].delete(indexer)
-        self._cache.clear()
+        self._reset_cache()
         return self
 
     def fast_xs(self, loc):

From 91541c1371599cfea877b1748b8ad0c8a3e67976 Mon Sep 17 00:00:00 2001
From: Tolker-KU <55140581+Tolker-KU@users.noreply.github.com>
Date: Wed, 28 Aug 2024 19:34:31 +0200
Subject: [PATCH 030/176] PERF: Improve efficiency of `BlockValuesRefs`
 (#59598)

* Improve efficency of _libs.internal

* Re-add python version of ._rebuild_blknos_and_blklocs
---
 pandas/_libs/internals.pyx | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 05c4e7bd5e9dc..53e903aab9cb8 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -1,9 +1,9 @@
 from collections import defaultdict
-import weakref
 
 cimport cython
 from cpython.pyport cimport PY_SSIZE_T_MAX
 from cpython.slice cimport PySlice_GetIndicesEx
+from cpython.weakref cimport PyWeakref_NewRef
 from cython cimport Py_ssize_t
 
 import numpy as np
@@ -746,7 +746,7 @@ cdef class BlockManager:
     # -------------------------------------------------------------------
     # Block Placement
 
-    def _rebuild_blknos_and_blklocs(self) -> None:
+    cpdef _rebuild_blknos_and_blklocs(self):
         """
         Update mgr._blknos / mgr._blklocs.
         """
@@ -890,12 +890,12 @@ cdef class BlockValuesRefs:
 
     def __cinit__(self, blk: Block | None = None) -> None:
         if blk is not None:
-            self.referenced_blocks = [weakref.ref(blk)]
+            self.referenced_blocks = [PyWeakref_NewRef(blk, None)]
         else:
             self.referenced_blocks = []
         self.clear_counter = 500  # set reasonably high
 
-    def _clear_dead_references(self, force=False) -> None:
+    cdef _clear_dead_references(self, bint force=False):
         # Use exponential backoff to decide when we want to clear references
         # if force=False. Clearing for every insertion causes slowdowns if
         # all these objects stay alive, e.g. df.items() for wide DataFrames
@@ -910,7 +910,7 @@ cdef class BlockValuesRefs:
             elif nr_of_refs > self.clear_counter:
                 self.clear_counter = max(self.clear_counter * 2, nr_of_refs)
 
-    def add_reference(self, blk: Block) -> None:
+    cpdef add_reference(self, Block blk):
         """Adds a new reference to our reference collection.
 
         Parameters
@@ -919,7 +919,7 @@ cdef class BlockValuesRefs:
             The block that the new references should point to.
         """
         self._clear_dead_references()
-        self.referenced_blocks.append(weakref.ref(blk))
+        self.referenced_blocks.append(PyWeakref_NewRef(blk, None))
 
     def add_index_reference(self, index: object) -> None:
         """Adds a new reference to our reference collection when creating an index.
@@ -930,7 +930,7 @@ cdef class BlockValuesRefs:
             The index that the new reference should point to.
         """
         self._clear_dead_references()
-        self.referenced_blocks.append(weakref.ref(index))
+        self.referenced_blocks.append(PyWeakref_NewRef(index, None))
 
     def has_reference(self) -> bool:
         """Checks if block has foreign references.

From 220c18d1e80a36306c9dbe99abd80e16aa6dd9e5 Mon Sep 17 00:00:00 2001
From: specialkapa <96375053+specialkapa@users.noreply.github.com>
Date: Wed, 28 Aug 2024 19:36:41 +0200
Subject: [PATCH 031/176] Doc/59592/fix docstring validation errors (#59611)

* adding extended summary, populating return description and adding see also section

* populating return description section

* populating return description section

* fixing zfil docstrign as per PR #59597

* removing methods whose docstrign has been fixed from code_checks.sh script

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh                   |  2 --
 pandas/core/indexes/datetimelike.py | 12 ++++++++++++
 pandas/core/indexes/multi.py        |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b775077481e44..324305417a600 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
         -i "pandas.MultiIndex.reorder_levels RT03,SA01" \
-        -i "pandas.MultiIndex.to_frame RT03" \
         -i "pandas.NA SA01" \
         -i "pandas.NaT SA01" \
         -i "pandas.Period.freq GL08" \
@@ -128,7 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.Timedelta.total_seconds SA01" \
         -i "pandas.Timedelta.view SA01" \
-        -i "pandas.TimedeltaIndex.as_unit RT03,SA01" \
         -i "pandas.TimedeltaIndex.components SA01" \
         -i "pandas.TimedeltaIndex.microseconds SA01" \
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index e1120466eaf83..8b316de30662c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -441,6 +441,10 @@ def as_unit(self, unit: str) -> Self:
         """
         Convert to a dtype with the given unit resolution.
 
+        This method is for converting the dtype of a ``DatetimeIndex`` or
+        ``TimedeltaIndex`` to a new dtype with the given unit
+        resolution/precision.
+
         Parameters
         ----------
         unit : {'s', 'ms', 'us', 'ns'}
@@ -448,6 +452,14 @@ def as_unit(self, unit: str) -> Self:
         Returns
         -------
         same type as self
+            Converted to the specified unit.
+
+        See Also
+        --------
+        Timestamp.as_unit : Convert to the given unit.
+        Timedelta.as_unit : Convert to the given unit.
+        DatetimeIndex.as_unit : Convert to the given unit.
+        TimedeltaIndex.as_unit : Convert to the given unit.
 
         Examples
         --------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ceb71d0b9fbb9..500a5ad62fddc 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1894,6 +1894,7 @@ def to_frame(
         Returns
         -------
         DataFrame
+            DataFrame representation of the MultiIndex, with levels as columns.
 
         See Also
         --------

From ad077aade1f959f3eab97ef4d25fc69b9c9a6b00 Mon Sep 17 00:00:00 2001
From: Tolker-KU <55140581+Tolker-KU@users.noreply.github.com>
Date: Wed, 28 Aug 2024 21:52:10 +0200
Subject: [PATCH 032/176] PERF: Improve efficiency of
 `BlockValuesRefs._clear_dead_references(...)` (#59643)

Improve performance of BlockValuesRefs._clear_dead_references(...)
---
 pandas/_libs/internals.pyx | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 53e903aab9cb8..99737776ff59f 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -1,9 +1,13 @@
 from collections import defaultdict
 
 cimport cython
+from cpython.object cimport PyObject
 from cpython.pyport cimport PY_SSIZE_T_MAX
 from cpython.slice cimport PySlice_GetIndicesEx
-from cpython.weakref cimport PyWeakref_NewRef
+from cpython.weakref cimport (
+    PyWeakref_GetObject,
+    PyWeakref_NewRef,
+)
 from cython cimport Py_ssize_t
 
 import numpy as np
@@ -26,6 +30,10 @@ from pandas._libs.util cimport (
 )
 
 
+cdef extern from "Python.h":
+    PyObject* Py_None
+
+
 @cython.final
 @cython.freelist(32)
 cdef class BlockPlacement:
@@ -902,7 +910,8 @@ cdef class BlockValuesRefs:
         # see GH#55245 and GH#55008
         if force or len(self.referenced_blocks) > self.clear_counter:
             self.referenced_blocks = [
-                ref for ref in self.referenced_blocks if ref() is not None
+                ref for ref in self.referenced_blocks
+                if PyWeakref_GetObject(ref) != Py_None
             ]
             nr_of_refs = len(self.referenced_blocks)
             if nr_of_refs < self.clear_counter // 2:

From 8fa78ec06ef0d8191d95c922d5b98c6398b5eb2d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 12:52:56 -0700
Subject: [PATCH 033/176] TST (string) fix xfailed groupby tests (3) (#59642)

* TST (string) fix xfailed groupby tests (3)

* TST: non-pyarrow build
---
 pandas/tests/groupby/methods/test_describe.py |  8 ++---
 pandas/tests/groupby/methods/test_nth.py      |  6 ++--
 pandas/tests/groupby/test_groupby_dropna.py   | 16 ----------
 .../tests/groupby/transform/test_transform.py | 29 +++++++++++++------
 4 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py
index 5f1f85d8179cd..6c4b913574d9e 100644
--- a/pandas/tests/groupby/methods/test_describe.py
+++ b/pandas/tests/groupby/methods/test_describe.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_frame_describe_multikey(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.describe()
@@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
         group = grouped[col].describe()
         # GH 17464 - Remove duplicate MultiIndex levels
         group_col = MultiIndex(
-            levels=[[col], group.columns],
+            levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
             codes=[[0] * len(group.columns), range(len(group.columns))],
         )
         group = DataFrame(group.values, columns=group_col, index=group.index)
@@ -249,7 +246,6 @@ def test_describe_non_cython_paths():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype", [int, float, object])
 @pytest.mark.parametrize(
     "kwargs",
@@ -271,5 +267,5 @@ def test_groupby_empty_dataset(dtype, kwargs):
 
     result = df.iloc[:0].groupby("A").B.describe(**kwargs)
     expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
-    expected.index = Index([])
+    expected.index = Index([], dtype=df.columns.dtype)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
index d20b30834dea2..d9c8706ec9202 100644
--- a/pandas/tests/groupby/methods/test_nth.py
+++ b/pandas/tests/groupby/methods/test_nth.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -679,14 +677,14 @@ def test_first_multi_key_groupby_categorical():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("method", ["first", "last", "nth"])
 def test_groupby_last_first_nth_with_none(method, nulls_fixture):
     # GH29645
-    expected = Series(["y"])
+    expected = Series(["y"], dtype=object)
     data = Series(
         [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
         index=[0, 0, 0, 0, 0],
+        dtype=object,
     ).groupby(level=0)
 
     if method == "nth":
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index bb54cbd69bd42..d42aa06d6bbfe 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -3,7 +3,6 @@
 
 from pandas._config import using_string_dtype
 
-from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.missing import na_value_for_dtype
@@ -13,9 +12,6 @@
 from pandas.tests.groupby import get_groupby_method_args
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, idx, expected",
     [
@@ -215,9 +205,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -299,9 +286,6 @@ def test_groupby_dropna_datetime_like_data(
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, data, selected_data, levels",
     [
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index a65dda1570944..053dda0629571 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -6,6 +6,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import ensure_platform_int
 
@@ -372,8 +373,7 @@ def test_transform_select_columns(df):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_transform_nuisance_raises(df):
+def test_transform_nuisance_raises(df, using_infer_string):
     # case that goes through _transform_item_by_item
 
     df.columns = ["A", "B", "B", "D"]
@@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df):
     grouped = df.groupby("A")
 
     gbc = grouped["B"]
-    with pytest.raises(TypeError, match="Could not convert"):
+    msg = "Could not convert"
+    if using_infer_string:
+        if df.columns.dtype.storage == "pyarrow":
+            msg = "with dtype str does not support operation 'mean'"
+        else:
+            msg = "Cannot perform reduction 'mean' with string dtype"
+    with pytest.raises(TypeError, match=msg):
         gbc.transform(lambda x: np.mean(x))
 
-    with pytest.raises(TypeError, match="Could not convert"):
+    with pytest.raises(TypeError, match=msg):
         df.groupby("A").transform(lambda x: np.mean(x))
 
 
@@ -445,8 +451,7 @@ def test_transform_coercion():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_groupby_transform_with_int():
+def test_groupby_transform_with_int(using_infer_string):
     # GH 3740, make sure that we might upcast on item-by-item transform
 
     # floats
@@ -476,8 +481,14 @@ def test_groupby_transform_with_int():
             "D": "foo",
         }
     )
+    msg = "Could not convert"
+    if using_infer_string:
+        if HAS_PYARROW:
+            msg = "with dtype str does not support operation 'mean'"
+        else:
+            msg = "Cannot perform reduction 'mean' with string dtype"
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match=msg):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -489,7 +500,7 @@ def test_groupby_transform_with_int():
     s = Series([2, 3, 4, 10, 5, -1])
     df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match=msg):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.slow
 @pytest.mark.parametrize(
     "op, args, targop",
@@ -757,6 +767,7 @@ def test_cython_transform_frame_column(
                 "does not support operation",
                 ".* is not supported for object dtype",
                 "is not implemented for this dtype",
+                ".* is not supported for str dtype",
             ]
         )
         with pytest.raises(TypeError, match=msg):

From 13cdd115ec964f396a4c9cbf7296a0cae344c274 Mon Sep 17 00:00:00 2001
From: UDIT BALIYAN <130930448+uditbaliyan@users.noreply.github.com>
Date: Thu, 29 Aug 2024 04:07:51 +0530
Subject: [PATCH 034/176] Docstring timestamps#59458 (#59644)

* fix pandas.Timestamp.fold GL08

* fix pandas.Timestamp.fold GL08

* remove i- pandas.Timestamp.fold GL08

* fix pre-commit error

* fix docstring error
---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timestamps.pyx | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 324305417a600..9012dc6559a13 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -132,7 +132,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
-        -i "pandas.Timestamp.fold GL08" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
         -i "pandas.Timestamp.nanosecond GL08" \
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index ec976f17af396..bdcb5e751c2a8 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -984,6 +984,30 @@ cdef class _Timestamp(ABCTimestamp):
         """
         return super().day
 
+    @property
+    def fold(self) -> int:
+        """
+        Return the fold value of the Timestamp.
+
+        Returns
+        -------
+        int
+            The fold value of the Timestamp, where 0 indicates the first occurrence
+            of the ambiguous time, and 1 indicates the second.
+
+        See Also
+        --------
+        Timestamp.dst : Return the daylight saving time (DST) adjustment.
+        Timestamp.tzinfo : Return the timezone information associated.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-11-03 01:30:00")
+        >>> ts.fold
+        0
+        """
+        return super().fold
+
     @property
     def month(self) -> int:
         """

From 27c7d51994c291271709e16fcf8766dd6627bb56 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 29 Aug 2024 05:27:10 -0700
Subject: [PATCH 035/176] REF (string): de-duplicate str_endswith, startswith
 (#59568)

---
 pandas/core/arrays/_arrow_string_mixins.py | 45 +++++++++++++++++++++-
 pandas/core/arrays/arrow/array.py          | 33 +---------------
 pandas/core/arrays/string_arrow.py         | 40 +------------------
 3 files changed, 46 insertions(+), 72 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 341ac2c0b48ec..c810af32f7480 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -9,16 +9,23 @@
 
 from pandas.compat import pa_version_under10p1
 
+from pandas.core.dtypes.missing import isna
+
 if not pa_version_under10p1:
     import pyarrow as pa
     import pyarrow.compute as pc
 
 if TYPE_CHECKING:
-    from pandas._typing import Self
+    from collections.abc import Sized
+
+    from pandas._typing import (
+        Scalar,
+        Self,
+    )
 
 
 class ArrowStringArrayMixin:
-    _pa_array = None
+    _pa_array: Sized
 
     def __init__(self, *args, **kwargs) -> None:
         raise NotImplementedError
@@ -97,3 +104,37 @@ def _str_removesuffix(self, suffix: str):
         removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
         result = pc.if_else(ends_with, removed, self._pa_array)
         return type(self)(result)
+
+    def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+        if isinstance(pat, str):
+            result = pc.starts_with(self._pa_array, pattern=pat)
+        else:
+            if len(pat) == 0:
+                # For empty tuple we return null for missing values and False
+                #  for valid values.
+                result = pc.if_else(pc.is_null(self._pa_array), None, False)
+            else:
+                result = pc.starts_with(self._pa_array, pattern=pat[0])
+
+                for p in pat[1:]:
+                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
+
+    def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+        if isinstance(pat, str):
+            result = pc.ends_with(self._pa_array, pattern=pat)
+        else:
+            if len(pat) == 0:
+                # For empty tuple we return null for missing values and False
+                #  for valid values.
+                result = pc.if_else(pc.is_null(self._pa_array), None, False)
+            else:
+                result = pc.ends_with(self._pa_array, pattern=pat[0])
+
+                for p in pat[1:]:
+                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index fbffb4a0a9990..fa778aab71349 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2337,38 +2337,7 @@ def _str_contains(
             result = result.fill_null(na)
         return type(self)(result)
 
-    def _str_startswith(self, pat: str | tuple[str, ...], na=None) -> Self:
-        if isinstance(pat, str):
-            result = pc.starts_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # For empty tuple, pd.StringDtype() returns null for missing values
-                # and false for valid values.
-                result = pc.if_else(pc.is_null(self._pa_array), None, False)
-            else:
-                result = pc.starts_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
-        return type(self)(result)
-
-    def _str_endswith(self, pat: str | tuple[str, ...], na=None) -> Self:
-        if isinstance(pat, str):
-            result = pc.ends_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # For empty tuple, pd.StringDtype() returns null for missing values
-                # and false for valid values.
-                result = pc.if_else(pc.is_null(self._pa_array), None, False)
-            else:
-                result = pc.ends_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
+    def _result_converter(self, result):
         return type(self)(result)
 
     def _str_replace(
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 15807c365ecfd..cfc892b9e3648 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -280,6 +280,8 @@ def astype(self, dtype, copy: bool = True):
     # String methods interface
 
     _str_map = BaseStringArray._str_map
+    _str_startswith = ArrowStringArrayMixin._str_startswith
+    _str_endswith = ArrowStringArrayMixin._str_endswith
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -298,44 +300,6 @@ def _str_contains(
             result[isna(result)] = bool(na)
         return result
 
-    def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
-        if isinstance(pat, str):
-            result = pc.starts_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # mimic existing behaviour of string extension array
-                # and python string method
-                result = pa.array(
-                    np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
-                )
-            else:
-                result = pc.starts_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
-        return self._convert_bool_result(result)
-
-    def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
-        if isinstance(pat, str):
-            result = pc.ends_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # mimic existing behaviour of string extension array
-                # and python string method
-                result = pa.array(
-                    np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
-                )
-            else:
-                result = pc.ends_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
-        return self._convert_bool_result(result)
-
     def _str_replace(
         self,
         pat: str | re.Pattern,

From e4956ab403846387a435cd7b3a8f36828c23c0c7 Mon Sep 17 00:00:00 2001
From: callumfrederiksen <95770122+callumfrederiksen@users.noreply.github.com>
Date: Thu, 29 Aug 2024 21:28:22 +0100
Subject: [PATCH 036/176] Fixed Typos #59640 (#59651)

* Fixed Typos

* Changes requested
---
 pandas/tests/io/parser/test_network.py    | 2 +-
 pandas/tests/plotting/test_hist_method.py | 2 +-
 web/pandas/index.html                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index 4ccfa8e81e883..836ac71d8e865 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -39,7 +39,7 @@ def test_compressed_urls(
     # test reading compressed urls with various engines and
     # extension inference
     if compression_only == "tar":
-        pytest.skip("TODO: Add tar salaraies.csv to pandas/io/parsers/data")
+        pytest.skip("TODO: Add tar salaries.csv to pandas/io/parsers/data")
 
     extension = compression_to_extension[compression_only]
     with open(datapath("io", "parser", "data", "salaries.csv" + extension), "rb") as f:
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index 65cb62917dc4e..410b658065d8d 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -110,7 +110,7 @@ def test_hist_layout_with_by(self, hist_df, by, layout, axes_num, res_layout):
 
         # _check_plot_works adds an `ax` kwarg to the method call
         # so we get a warning about an axis being cleared, even
-        # though we don't explicing pass one, see GH #13188
+        # though we don't explicitly pass one, see GH #13188
         with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
             axes = _check_plot_works(df.height.hist, by=getattr(df, by), layout=layout)
         _check_axes_shape(axes, axes_num=axes_num, layout=res_layout)
diff --git a/web/pandas/index.html b/web/pandas/index.html
index a7ed466cdcd05..63bc11d3ed5d8 100644
--- a/web/pandas/index.html
+++ b/web/pandas/index.html
@@ -49,7 +49,7 @@ <h5>With the support of:</h5>
                                 <div class="col-6 col-md-2">
                                     {% if company %}
                                         <a href="{{ company.url }}" target="_blank">
-                                            <img class="img-fluid img-thumnail py-5 mx-auto" alt="{{ company.name }}" src="{{ base_url }}{{ company.logo }}"/>
+                                            <img class="img-fluid img-thumbnail py-5 mx-auto" alt="{{ company.name }}" src="{{ base_url }}{{ company.logo }}"/>
                                         </a>
                                     {% endif %}
                                 </div>

From 828b6d07a6096a8709af5b0f45405343ee97eb34 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 30 Aug 2024 23:12:07 +0530
Subject: [PATCH 037/176] DOC: fix RT03,SA01,ES01 for
 pandas.MultiIndex.reorder_levels (#59658)

---
 ci/code_checks.sh            |  1 -
 pandas/core/indexes/multi.py | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 9012dc6559a13..1b4121d14f1fc 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,7 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.MultiIndex.reorder_levels RT03,SA01" \
         -i "pandas.NA SA01" \
         -i "pandas.NaT SA01" \
         -i "pandas.Period.freq GL08" \
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 500a5ad62fddc..878170c8377dc 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2604,6 +2604,13 @@ def reorder_levels(self, order) -> MultiIndex:
         """
         Rearrange levels using input order. May not drop or duplicate levels.
 
+        `reorder_levels` is useful when you need to change the order of levels in
+        a MultiIndex, such as when reordering levels for hierarchical indexing. It
+        maintains the integrity of the MultiIndex, ensuring that all existing levels
+        are present and no levels are duplicated. This method is helpful for aligning
+        the index structure with other data structures or for optimizing the order
+        for specific data operations.
+
         Parameters
         ----------
         order : list of int or list of str
@@ -2613,6 +2620,13 @@ def reorder_levels(self, order) -> MultiIndex:
         Returns
         -------
         MultiIndex
+            A new MultiIndex with levels rearranged according to the specified order.
+
+        See Also
+        --------
+        MultiIndex.swaplevel : Swap two levels of the MultiIndex.
+        MultiIndex.set_names : Set names for the MultiIndex levels.
+        DataFrame.reorder_levels : Reorder levels in a DataFrame with a MultiIndex.
 
         Examples
         --------

From 929e8c524740c0728765f286a83b70d05d0a80f6 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 30 Aug 2024 23:13:58 +0530
Subject: [PATCH 038/176] DOC: fix SA01,ES01 for pandas.Period.freqstr (#59661)

---
 ci/code_checks.sh              |  1 -
 pandas/_libs/tslibs/period.pyx | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 1b4121d14f1fc..e796a1e39a967 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.NA SA01" \
         -i "pandas.NaT SA01" \
         -i "pandas.Period.freq GL08" \
-        -i "pandas.Period.freqstr SA01" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.Period.strftime PR01,SA01" \
         -i "pandas.Period.to_timestamp SA01" \
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index 4f5dfc75a20bf..b3f06565c7789 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -2607,6 +2607,17 @@ cdef class _Period(PeriodMixin):
         """
         Return a string representation of the frequency.
 
+        This property provides the frequency string associated with the `Period`
+        object. The frequency string describes the granularity of the time span
+        represented by the `Period`. Common frequency strings include 'D' for
+        daily, 'M' for monthly, 'Y' for yearly, etc.
+
+        See Also
+        --------
+        Period.asfreq : Convert Period to desired frequency, at the start or end
+            of the interval.
+        period_range : Return a fixed frequency PeriodIndex.
+
         Examples
         --------
         >>> pd.Period('2020-01', 'D').freqstr

From 952cbb6103da7bc055611df745f85828bfa93de2 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 30 Aug 2024 23:14:32 +0530
Subject: [PATCH 039/176] DOC: fix SA01,ES01 for pandas.NaT (#59660)

---
 ci/code_checks.sh               |  1 -
 pandas/_libs/tslibs/nattype.pyx | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index e796a1e39a967..2f95367266a36 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
         -i "pandas.NA SA01" \
-        -i "pandas.NaT SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.Period.strftime PR01,SA01" \
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 41011ff13737a..25d9510bcd543 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -348,6 +348,22 @@ class NaTType(_NaT):
     """
     (N)ot-(A)-(T)ime, the time equivalent of NaN.
 
+    NaT is used to denote missing or null values in datetime and timedelta objects
+    in pandas. It functions similarly to how NaN is used for numerical data.
+    Operations with NaT will generally propagate the NaT value, similar to NaN.
+    NaT can be used in pandas data structures like Series and DataFrame
+    to represent missing datetime values. It is useful in data analysis
+    and time series analysis when working with incomplete or sparse
+    time-based data. Pandas provides robust handling of NaT to ensure
+    consistency and reliability in computations involving datetime objects.
+
+    See Also
+    --------
+    NA : NA ("not available") missing value indicator.
+    isna : Detect missing values (NaN or NaT) in an array-like object.
+    notna : Detect non-missing values.
+    numpy.nan : Floating point representation of Not a Number (NaN) for numerical data.
+
     Examples
     --------
     >>> pd.DataFrame([pd.Timestamp("2023"), np.nan], columns=["col_1"])

From 5593886ee384893c35565dfa29eaa956ccbe1186 Mon Sep 17 00:00:00 2001
From: musvaage <112724366+musvaage@users.noreply.github.com>
Date: Fri, 30 Aug 2024 20:08:14 +0200
Subject: [PATCH 040/176] fix typos (#59665)

typos
---
 asv_bench/benchmarks/indexing_engines.py                  | 6 +++---
 doc/source/development/contributing_codebase.rst          | 2 +-
 doc/source/development/debugging_extensions.rst           | 2 +-
 doc/source/getting_started/index.rst                      | 2 +-
 doc/source/user_guide/cookbook.rst                        | 2 +-
 doc/source/user_guide/io.rst                              | 4 ++--
 doc/source/user_guide/style.ipynb                         | 2 +-
 doc/source/whatsnew/v0.21.1.rst                           | 2 +-
 doc/source/whatsnew/v0.25.0.rst                           | 2 +-
 doc/source/whatsnew/v1.0.0.rst                            | 2 +-
 doc/source/whatsnew/v2.0.0.rst                            | 2 +-
 pandas/_libs/tslibs/nattype.pyx                           | 2 +-
 pandas/_libs/tslibs/np_datetime.pxd                       | 2 +-
 pandas/_libs/tslibs/offsets.pyx                           | 4 ++--
 pandas/_libs/tslibs/timestamps.pyx                        | 4 ++--
 pandas/_typing.py                                         | 4 ++--
 pandas/_version.py                                        | 2 +-
 pandas/core/arraylike.py                                  | 2 +-
 pandas/core/arrays/base.py                                | 2 +-
 pandas/core/arrays/string_arrow.py                        | 2 +-
 pandas/core/dtypes/cast.py                                | 2 +-
 pandas/core/frame.py                                      | 6 +++---
 pandas/core/generic.py                                    | 2 +-
 pandas/core/indexes/base.py                               | 2 +-
 pandas/core/internals/blocks.py                           | 2 +-
 pandas/core/internals/construction.py                     | 2 +-
 pandas/core/internals/managers.py                         | 2 +-
 pandas/core/reshape/concat.py                             | 2 +-
 pandas/errors/__init__.py                                 | 2 +-
 pandas/io/formats/style.py                                | 2 +-
 pandas/io/formats/style_render.py                         | 8 ++++----
 pandas/io/pytables.py                                     | 2 +-
 pandas/tests/apply/test_numba.py                          | 2 +-
 pandas/tests/dtypes/test_inference.py                     | 2 +-
 pandas/tests/extension/base/dtype.py                      | 2 +-
 pandas/tests/extension/base/io.py                         | 2 +-
 pandas/tests/frame/methods/test_rank.py                   | 2 +-
 pandas/tests/frame/test_api.py                            | 2 +-
 pandas/tests/indexes/datetimes/test_arithmetic.py         | 2 +-
 pandas/tests/indexes/datetimes/test_constructors.py       | 2 +-
 pandas/tests/indexing/test_indexing.py                    | 2 +-
 pandas/tests/io/formats/style/test_matplotlib.py          | 4 ++--
 pandas/tests/io/parser/test_header.py                     | 4 ++--
 pandas/tests/io/xml/test_to_xml.py                        | 4 ++--
 pandas/tests/plotting/frame/test_frame_subplots.py        | 2 +-
 pandas/tests/test_aggregation.py                          | 2 +-
 web/pandas/community/blog/2019-user-survey.md             | 2 +-
 web/pandas/community/ecosystem.md                         | 2 +-
 web/pandas/pdeps/0010-required-pyarrow-dependency.md      | 4 ++--
 .../pdeps/0012-compact-and-reversible-JSON-interface.md   | 2 +-
 50 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
index fd3d0f0b9cf2e..5e3c593e269cb 100644
--- a/asv_bench/benchmarks/indexing_engines.py
+++ b/asv_bench/benchmarks/indexing_engines.py
@@ -87,7 +87,7 @@ def setup(self, engine_and_dtype, index_type, unique, N):
                 arr = np.array([1, 2, 3], dtype=dtype).repeat(N)
 
         self.data = engine(arr)
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
         self.key_middle = arr[len(arr) // 2]
@@ -140,7 +140,7 @@ def setup(self, engine_and_dtype, index_type, unique, N):
             mask[-1] = True
 
         self.data = engine(BaseMaskedArray(arr, mask))
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
         self.key_middle = arr[len(arr) // 2]
@@ -169,7 +169,7 @@ def setup(self, index_type):
         }[index_type]
 
         self.data = libindex.ObjectEngine(arr)
-        # code belows avoids populating the mapping etc. while timing.
+        # code below avoids populating the mapping etc. while timing.
         self.data.get_loc("b")
 
     def time_get_loc(self, index_type):
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index 277f407ae4418..9d5a992e911b6 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -605,7 +605,7 @@ The ``temp_file`` pytest fixture creates a temporary file :py:class:`Pathlib` ob
         pd.DataFrame([1]).to_csv(str(temp_file))
 
 Please reference `pytest's documentation <https://docs.pytest.org/en/latest/how-to/tmp_path.html#the-default-base-temporary-directory>`_
-for the file retension policy.
+for the file retention policy.
 
 Testing involving network connectivity
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst
index 376d7b21cab52..0ea1c112cb55b 100644
--- a/doc/source/development/debugging_extensions.rst
+++ b/doc/source/development/debugging_extensions.rst
@@ -30,7 +30,7 @@ By specifying ``builddir="debug"`` all of the targets will be built and placed i
 Using Docker
 ------------
 
-To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locallly.
+To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locally.
 
 You can then mount your pandas repository into this image via:
 
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 9f29f7f4f4406..36ed553d9d88e 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -613,7 +613,7 @@ the pandas-equivalent operations compared to software you already know:
 
         Users of `Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__
         or other spreadsheet programs will find that many of the concepts are
-        transferrable to pandas.
+        transferable to pandas.
 
         +++
 
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
index 3dfc6534f2b64..42430fb1fbba0 100644
--- a/doc/source/user_guide/cookbook.rst
+++ b/doc/source/user_guide/cookbook.rst
@@ -914,7 +914,7 @@ Using TimeGrouper and another grouping to create subgroups, then apply a custom
 <https://stackoverflow.com/questions/15408156/resampling-with-custom-periods>`__
 
 `Resample intraday frame without adding new days
-<https://stackoverflow.com/questions/14898574/resample-intrday-pandas-dataframe-without-add-new-days>`__
+<https://stackoverflow.com/questions/14898574/resample-intraday-pandas-dataframe-without-add-new-days>`__
 
 `Resample minute data
 <https://stackoverflow.com/questions/14861023/resampling-minute-data>`__
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index be40710a9e307..b996e1706ca2f 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -169,7 +169,7 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram
   implementation when "numpy_nullable" is set, pyarrow is used for all
   dtypes if "pyarrow" is set.
 
-  The dtype_backends are still experimential.
+  The dtype_backends are still experiential.
 
   .. versionadded:: 2.0
 
@@ -2893,7 +2893,7 @@ Read in the content of the "books.xml" as instance of ``StringIO`` or
    df
 
 Even read XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing
-Biomedical and Life Science Jorurnals:
+Biomedical and Life Science Journals:
 
 .. code-block:: python
 
diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
index f4a55280cd1f1..daecfce6ecebc 100644
--- a/doc/source/user_guide/style.ipynb
+++ b/doc/source/user_guide/style.ipynb
@@ -1182,7 +1182,7 @@
     "Some styling functions are common enough that we've \"built them in\" to the `Styler`, so you don't have to write them and apply them yourself. The current list of such functions is:\n",
     "\n",
     " - [.highlight_null][nullfunc]: for use with identifying missing data. \n",
-    " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremeties in data.\n",
+    " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremities in data.\n",
     " - [.highlight_between][betweenfunc] and [.highlight_quantile][quantilefunc]: for use with identifying classes within data.\n",
     " - [.background_gradient][bgfunc]: a flexible method for highlighting cells based on their, or other, values on a numeric scale.\n",
     " - [.text_gradient][textfunc]: similar method for highlighting text based on their, or other, values on a numeric scale.\n",
diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst
index e217e1a75efc5..bb08d85b5a052 100644
--- a/doc/source/whatsnew/v0.21.1.rst
+++ b/doc/source/whatsnew/v0.21.1.rst
@@ -141,7 +141,7 @@ IO
 Plotting
 ^^^^^^^^
 
-- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`)
+- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not picklable in Python 3 (:issue:`18439`)
 
 GroupBy/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 50be28a912cf6..243714651e3b9 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -1159,7 +1159,7 @@ IO
 - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`)
 - Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`)
 - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`)
-- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
+- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested workarounds (:issue:`25772`)
 - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
 - Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`)
 - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 5dbf6f1c60598..98cb9c4ad7b45 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -900,7 +900,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`)
 - ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`)
 - :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`)
-- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`)
+- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passing a tuple instead (:issue:`30003`)
 - Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`)
 - :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`)
 - Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`)
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index cacbf8452ba32..ddcd69c3fd962 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1130,7 +1130,7 @@ Performance improvements
 - Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`)
 - Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
 - Performance improvement in :meth:`Series.median` for nullable dtypes (:issue:`50838`)
-- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`)
+- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsets (:issue:`35296`)
 - Performance improvement in :func:`isna` and :func:`isnull` (:issue:`50658`)
 - Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`)
 - Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 25d9510bcd543..60afc1acdc297 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -1821,7 +1821,7 @@ default 'raise'
 
     def as_unit(self, str unit, bint round_ok=True) -> "NaTType":
         """
-        Convert the underlying int64 representaton to the given unit.
+        Convert the underlying int64 representation to the given unit.
 
         Parameters
         ----------
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index cb2658d343772..43240046c6500 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -34,7 +34,7 @@ cdef extern from "numpy/ndarraytypes.h":
         NPY_FR_as
         NPY_FR_GENERIC
 
-    int64_t NPY_DATETIME_NAT  # elswhere we call this NPY_NAT
+    int64_t NPY_DATETIME_NAT  # elsewhere we call this NPY_NAT
 
 
 cdef extern from "pandas/datetime/pd_datetime.h":
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index c48acc07b34db..043c029ec900c 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -792,7 +792,7 @@ cdef class BaseOffset:
 
     def __getstate__(self):
         """
-        Return a pickleable state
+        Return a picklable state
         """
         state = {}
         state["n"] = self.n
@@ -1456,7 +1456,7 @@ cdef class RelativeDeltaOffset(BaseOffset):
 
     def __getstate__(self):
         """
-        Return a pickleable state
+        Return a picklable state
         """
         # RelativeDeltaOffset (technically DateOffset) is the only non-cdef
         #  class, so the only one with __dict__
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index bdcb5e751c2a8..b3811eb644dc5 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -321,7 +321,7 @@ cdef class _Timestamp(ABCTimestamp):
     def _from_dt64(cls, dt64: np.datetime64):
         # construct a Timestamp from a np.datetime64 object, keeping the
         #  resolution of the input.
-        # This is herely mainly so we can incrementally implement non-nano
+        # This is here mainly so we can incrementally implement non-nano
         #  (e.g. only tznaive at first)
         cdef:
             int64_t value
@@ -1359,7 +1359,7 @@ cdef class _Timestamp(ABCTimestamp):
 
     def as_unit(self, str unit, bint round_ok=True):
         """
-        Convert the underlying int64 representaton to the given unit.
+        Convert the underlying int64 representation to the given unit.
 
         Parameters
         ----------
diff --git a/pandas/_typing.py b/pandas/_typing.py
index d43e6e900546d..c1769126a5776 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -429,7 +429,7 @@ def closed(self) -> bool:
 SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
 NaPosition = Literal["first", "last"]
 
-# Arguments for nsmalles and n_largest
+# Arguments for nsmallest and nlargest
 NsmallestNlargestKeep = Literal["first", "last", "all"]
 
 # quantile interpolation
@@ -524,7 +524,7 @@ def closed(self) -> bool:
     None,
 ]
 
-# maintaine the sub-type of any hashable sequence
+# maintain the sub-type of any hashable sequence
 SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
 
 SliceType = Optional[Hashable]
diff --git a/pandas/_version.py b/pandas/_version.py
index b32c9e67fdbb6..c5e3c16d3f773 100644
--- a/pandas/_version.py
+++ b/pandas/_version.py
@@ -1,5 +1,5 @@
 # This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
+# git-archive tarball (such as those provided by github's download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index 03c73489bd3d8..f70bb0743aa0f 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -522,7 +522,7 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
             #  so calls DataFrame.min (without ever getting here) with the np.min
             #  default of axis=None, which DataFrame.min catches and changes to axis=0.
             # np.minimum.reduce(df) gets here bc axis is not in kwargs,
-            #  so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
+            #  so we set axis=0 to match the behavior of np.minimum.reduce(df.values)
             kwargs["axis"] = 0
 
     # By default, numpy's reductions do not skip NaNs, so we have to
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 2124f86b03b9c..536c7303a2f92 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1784,7 +1784,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         --------
         This gives view on the underlying data of an ``ExtensionArray`` and is not a
         copy. Modifications on either the view or the original ``ExtensionArray``
-        will be reflectd on the underlying data:
+        will be reflected on the underlying data:
 
         >>> arr = pd.array([1, 2, 3])
         >>> arr2 = arr.view()
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index cfc892b9e3648..e552f1960bf8c 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -257,7 +257,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         result = pc.is_in(
             self._pa_array, value_set=pa.array(value_set, type=self._pa_array.type)
         )
-        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
+        # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
         # to False
         return np.array(result, dtype=np.bool_)
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3394bf091e228..6ba07b1761557 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1378,7 +1378,7 @@ def common_dtype_categorical_compat(
     # TODO: more generally, could do `not can_hold_na(dtype)`
     if lib.is_np_dtype(dtype, "iu"):
         for obj in objs:
-            # We don't want to accientally allow e.g. "categorical" str here
+            # We don't want to accidentally allow e.g. "categorical" str here
             obj_dtype = getattr(obj, "dtype", None)
             if isinstance(obj_dtype, CategoricalDtype):
                 if isinstance(obj, ABCIndex):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b84fb33af26e5..f47acf579d79c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2199,7 +2199,7 @@ def maybe_reorder(
         ) -> tuple[list[ArrayLike], Index, Index | None]:
             """
             If our desired 'columns' do not match the data's pre-existing 'arr_columns',
-            we re-order our arrays.  This is like a pre-emptive (cheap) reindex.
+            we re-order our arrays.  This is like a preemptive (cheap) reindex.
             """
             if len(arrays):
                 length = len(arrays[0])
@@ -4484,7 +4484,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
 
             You can refer to column names that are not valid Python variable names
             by surrounding them in backticks. Thus, column names containing spaces
-            or punctuations (besides underscores) or starting with digits must be
+            or punctuation (besides underscores) or starting with digits must be
             surrounded by backticks. (For example, a column named "Area (cm^2)" would
             be referenced as ```Area (cm^2)```). Column names which are Python keywords
             (like "if", "for", "import", etc) cannot be used.
@@ -12360,7 +12360,7 @@ def std(
         --------
         Series.std : Return standard deviation over Series values.
         DataFrame.mean : Return the mean of the values over the requested axis.
-        DataFrame.mediam : Return the mediam of the values over the requested axis.
+        DataFrame.median : Return the median of the values over the requested axis.
         DataFrame.mode : Get the mode(s) of each element along the requested axis.
         DataFrame.sum : Return the sum of the values over the requested axis.
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3109b67a4fc43..dce462f3eabb1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6994,7 +6994,7 @@ def fillna(
                 f'you passed a "{type(value).__name__}"'
             )
 
-        # set the default here, so functions examining the signaure
+        # set the default here, so functions examining the signature
         # can detect if something was set (e.g. in groupby) (GH9221)
         if axis is None:
             axis = 0
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c8dbea1fd39ea..582e1f96fa562 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2943,7 +2943,7 @@ def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index
         """
         With mismatched timezones, cast both to UTC.
         """
-        # Caller is responsibelf or checking
+        # Caller is responsible for checking
         #  `self.dtype != other.dtype`
         if (
             isinstance(self, ABCDatetimeIndex)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index dfb96162f0ac1..dced92ba04520 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1478,7 +1478,7 @@ def round(self, decimals: int) -> Self:
         """
         Rounds the values.
         If the block is not of an integer or float dtype, nothing happens.
-        This is consistent with DataFrame.round behavivor.
+        This is consistent with DataFrame.round behavior.
         (Note: Series.round would raise)
 
         Parameters
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 535397871588c..07465e7b87fcd 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -621,7 +621,7 @@ def reorder_arrays(
     arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int
 ) -> tuple[list[ArrayLike], Index]:
     """
-    Pre-emptively (cheaply) reindex arrays with new columns.
+    Preemptively (cheaply) reindex arrays with new columns.
     """
     # reorder according to the columns
     if columns is not None:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index bade5fd1bdcf2..aa4a785519051 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1362,7 +1362,7 @@ def _iset_split_block(
         """Removes columns from a block by splitting the block.
 
         Avoids copying the whole block through slicing and updates the manager
-        after determinint the new block structure. Optionally adds a new block,
+        after determining the new block structure. Optionally adds a new block,
         otherwise has to be done by the caller.
 
         Parameters
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index c005a1ce26e4b..cfe83111b6e38 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -201,7 +201,7 @@ def concat(
         be very expensive relative to the actual data concatenation.
     sort : bool, default False
         Sort non-concatenation axis. One exception to this is when the
-        non-concatentation axis is a DatetimeIndex and join='outer' and the axis is
+        non-concatenation axis is a DatetimeIndex and join='outer' and the axis is
         not already aligned. In that case, the non-concatenation axis is always
         sorted lexicographically.
     copy : bool, default False
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index c8863e1b39c94..6d571031636b5 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -378,7 +378,7 @@ class InvalidIndexError(Exception):
 
 class DataError(Exception):
     """
-    Exceptionn raised when performing an operation on non-numerical data.
+    Exception raised when performing an operation on non-numerical data.
 
     For example, calling ``ohlc`` on a non-numerical column or a function
     on a rolling window.
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 82bc0301fed3a..6e5ae09485951 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -342,7 +342,7 @@ def concat(self, other: Styler) -> Styler:
             keys ``data``, ``row_heading`` and ``row`` will be prepended with
             ``foot0_``. If more concats are chained, their styles will be prepended
             with ``foot1_``, ''foot_2'', etc., and if a concatenated style have
-            another concatanated style, the second style will be prepended with
+            another concatenated style, the second style will be prepended with
             ``foot{parent}_foot{child}_``.
 
         A common use case is to concatenate user defined functions with
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
index ec718f2a1276f..8a6383f7e8f82 100644
--- a/pandas/io/formats/style_render.py
+++ b/pandas/io/formats/style_render.py
@@ -1064,7 +1064,7 @@ def format(
 
         .. warning::
            `Styler.format` is ignored when using the output format `Styler.to_excel`,
-           since Excel and Python have inherrently different formatting structures.
+           since Excel and Python have inherently different formatting structures.
            However, it is possible to use the `number-format` pseudo CSS attribute
            to force Excel permissible formatting. See examples.
 
@@ -1312,7 +1312,7 @@ def format_index(
 
         .. warning::
            `Styler.format_index` is ignored when using the output format
-           `Styler.to_excel`, since Excel and Python have inherrently different
+           `Styler.to_excel`, since Excel and Python have inherently different
            formatting structures.
            However, it is possible to use the `number-format` pseudo CSS attribute
            to force Excel permissible formatting. See documentation for `Styler.format`.
@@ -1649,7 +1649,7 @@ def format_index_names(
 
         .. warning::
             `Styler.format_index_names` is ignored when using the output format
-            `Styler.to_excel`, since Excel and Python have inherrently different
+            `Styler.to_excel`, since Excel and Python have inherently different
             formatting structures.
 
         Examples
@@ -2410,7 +2410,7 @@ def _parse_latex_header_span(
     r"""
     Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present.
 
-    'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then
+    'rowspan' and 'colspan' do not occur simultaneously. If they are detected then
     the `display_value` is altered to a LaTeX `multirow` or `multicol` command
     respectively, with the appropriate cell-span.
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 618254fee9259..be7b8dc6640ba 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3580,7 +3580,7 @@ def is_transposed(self) -> bool:
 
     @property
     def data_orientation(self) -> tuple[int, ...]:
-        """return a tuple of my permutated axes, non_indexable at the front"""
+        """return a tuple of my permuted axes, non_indexable at the front"""
         return tuple(
             itertools.chain(
                 [int(a[0]) for a in self.non_index_axes],
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index 6bbe5100e8826..d86eeadbaa0fe 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -73,7 +73,7 @@ def test_numba_vs_python_reductions(reduction, apply_axis):
 
 @pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]])
 def test_numba_numeric_colnames(colnames):
-    # Check that numeric column names lower properly and can be indxed on
+    # Check that numeric column names lower properly and can be indexed on
     df = DataFrame(
         np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames
     )
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index b1d7c701e1267..da444b55490f0 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -1925,7 +1925,7 @@ def test_is_scalar_pandas_containers(self):
         assert not is_scalar(pd.array([1, 2, 3]))
 
     def test_is_scalar_number(self):
-        # Number() is not recognied by PyNumber_Check, so by extension
+        # Number() is not recognized by PyNumber_Check, so by extension
         #  is not recognized by is_scalar, but instances of non-abstract
         #  subclasses are.
 
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index c7b768f6e3c88..38478ed3c40ae 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -85,7 +85,7 @@ def test_str(self, dtype):
 
     def test_eq(self, dtype):
         assert dtype == dtype.name
-        assert dtype != "anonther_type"
+        assert dtype != "another_type"
 
     def test_construct_from_string_own_name(self, dtype):
         result = dtype.construct_from_string(dtype.name)
diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py
index 3a6f2eb5ba8b1..f7367fceeb52f 100644
--- a/pandas/tests/extension/base/io.py
+++ b/pandas/tests/extension/base/io.py
@@ -13,7 +13,7 @@ class BaseParsingTests:
     def test_EA_types(self, engine, data, request):
         if isinstance(data.dtype, pd.CategoricalDtype):
             # in parsers.pyx _convert_with_dtype there is special-casing for
-            #  Categorical that pre-empts _from_sequence_of_strings
+            #  Categorical that preempts _from_sequence_of_strings
             pass
         elif isinstance(data.dtype, pd.core.dtypes.dtypes.NumpyEADtype):
             # These get unwrapped internally so are treated as numpy dtypes
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index 4e8e267523439..4b1435babe6b1 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -396,7 +396,7 @@ def test_rank_inf_and_nan(self, contents, dtype, frame_or_series):
         # Insert nans at random positions if underlying dtype has missing
         # value. Then adjust the expected order by adding nans accordingly
         # This is for testing whether rank calculation is affected
-        # when values are interwined with nan values.
+        # when values are intertwined with nan values.
         values = np.array(contents, dtype=dtype)
         exp_order = np.array(range(len(values)), dtype="float64") + 1.0
         if dtype in dtype_na_map:
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index f8219e68a72da..3fb994f2e0aff 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -87,7 +87,7 @@ def test_tab_completion(self):
         assert isinstance(df.__getitem__("A"), DataFrame)
 
     def test_display_max_dir_items(self):
-        # display.max_dir_items increaes the number of columns that are in __dir__.
+        # display.max_dir_items increases the number of columns that are in __dir__.
         columns = ["a" + str(i) for i in range(420)]
         values = [range(420), range(420)]
         df = DataFrame(values, columns=columns)
diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py
index 3a7c418b27de6..bac849301d1f7 100644
--- a/pandas/tests/indexes/datetimes/test_arithmetic.py
+++ b/pandas/tests/indexes/datetimes/test_arithmetic.py
@@ -1,5 +1,5 @@
 # Arithmetic tests specific to DatetimeIndex are generally about `freq`
-#  rentention or inference.  Other arithmetic tests belong in
+#  retention or inference.  Other arithmetic tests belong in
 #  tests/arithmetic/test_datetime64.py
 import pytest
 
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index 8da88b97f9ea8..c418b2a18008b 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -122,7 +122,7 @@ def test_dti_with_period_data_raises(self):
             to_datetime(period_array(data))
 
     def test_dti_with_timedelta64_data_raises(self):
-        # GH#23675 deprecated, enforrced in GH#29794
+        # GH#23675 deprecated, enforced in GH#29794
         data = np.array([0], dtype="m8[ns]")
         msg = r"timedelta64\[ns\] cannot be converted to datetime64"
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index ef8c0e432ca49..f7ada06e3ecb2 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -676,7 +676,7 @@ def run_tests(df, rhs, right_loc, right_iloc):
         cols = ["jim", "joe", "jolie", "joline"]
         df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")
 
-        # right hand side; permute the indices and multiplpy by -2
+        # right hand side; permute the indices and multiply by -2
         rhs = -2 * df.iloc[3:0:-1, 2:0:-1]
 
         # expected `right` result; just multiply by -2
diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py
index 296fb20d855c4..490bd45bfb2ee 100644
--- a/pandas/tests/io/formats/style/test_matplotlib.py
+++ b/pandas/tests/io/formats/style/test_matplotlib.py
@@ -225,8 +225,8 @@ def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, ex
 @pytest.mark.parametrize(
     "gmap, axis, exp_gmap",
     [
-        (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]),  # revrse the index
-        (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]),  # revrse the cols
+        (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]),  # reverse the index
+        (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]),  # reverse the cols
         (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]),  # add idx
         (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]),  # add col
     ],
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index c6efbd8059138..d333aef723de2 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -538,7 +538,7 @@ def test_mangles_multi_index(all_parsers, data, expected):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # TypeError: an integer is requireds
+@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize("index_col", [None, [0]])
 @pytest.mark.parametrize(
     "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])]
@@ -670,7 +670,7 @@ def test_header_none_and_on_bad_lines_skip(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # TypeError: an integer is requireds
+@xfail_pyarrow  # TypeError: an integer is required
 def test_header_missing_rows(all_parsers):
     # GH#47400
     parser = all_parsers
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 62cc33376c630..50fef2c5eb4eb 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -33,7 +33,7 @@
 # [X] - KeyError: "...is not included in namespaces"
 # [X] - KeyError: "no valid column"
 # [X] - ValueError: "To use stylesheet, you need lxml installed..."
-# []  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+# []  - OSError: (NEED PERMISSION ISSUE, DISK FULL, ETC.)
 # [X] - FileNotFoundError: "No such file or directory"
 # [X] - PermissionError: "Forbidden"
 
@@ -41,7 +41,7 @@
 # [X] - TypeError: "...is not a valid type for attr_cols"
 # [X] - TypeError: "...is not a valid type for elem_cols"
 # [X] - LookupError: "unknown encoding"
-# []  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+# []  - OSError: (NEED PERMISSION ISSUE, DISK FULL, ETC.)
 # [X] - FileNotFoundError: "No such file or directory"
 # [X] - KeyError: "...is not included in namespaces"
 # [X] - KeyError: "no valid column"
diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py
index a98f4b56ebf4d..b44725a01fe23 100644
--- a/pandas/tests/plotting/frame/test_frame_subplots.py
+++ b/pandas/tests/plotting/frame/test_frame_subplots.py
@@ -327,7 +327,7 @@ def test_subplots_multiple_axes_error(self):
     def test_subplots_multiple_axes_2_dim(self, layout, exp_layout):
         # GH 5353, 6970, GH 7069
         # pass 2-dim axes and invalid layout
-        # invalid lauout should not affect to input and return value
+        # invalid layout should not affect to input and return value
         # (show warning is tested in
         # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
         _, axes = mpl.pyplot.subplots(2, 2)
diff --git a/pandas/tests/test_aggregation.py b/pandas/tests/test_aggregation.py
index 7695c953712ed..3a01805cc2365 100644
--- a/pandas/tests/test_aggregation.py
+++ b/pandas/tests/test_aggregation.py
@@ -10,7 +10,7 @@
 def test_maybe_mangle_lambdas_passthrough():
     assert maybe_mangle_lambdas("mean") == "mean"
     assert maybe_mangle_lambdas(lambda x: x).__name__ == "<lambda>"
-    # don't mangel single lambda.
+    # don't mangle single lambda.
     assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "<lambda>"
 
 
diff --git a/web/pandas/community/blog/2019-user-survey.md b/web/pandas/community/blog/2019-user-survey.md
index 312ee49bdf387..821fdd01acf65 100644
--- a/web/pandas/community/blog/2019-user-survey.md
+++ b/web/pandas/community/blog/2019-user-survey.md
@@ -77,7 +77,7 @@ For environment isolation, [conda](https://conda.io/en/latest/) was the most pop
 ![png]({{ base_url }}/static/img/blog/2019-user-survey/2019_13_0.png)
 
 
-Most repondents are Python 3 only.
+Most respondents are Python 3 only.
 
 
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
index c14996211bb8b..73a3cb6429790 100644
--- a/web/pandas/community/ecosystem.md
+++ b/web/pandas/community/ecosystem.md
@@ -587,7 +587,7 @@ See installation and usage instructions on the [GitHub page](https://github.com/
 Hamilton is a declarative dataflow framework that came out of Stitch Fix. It was designed to help one manage a
 Pandas code base, specifically with respect to feature engineering for machine learning models.
 
-It prescibes an opinionated paradigm, that ensures all code is:
+It prescribes an opinionated paradigm, that ensures all code is:
 
 * unit testable
 * integration testing friendly
diff --git a/web/pandas/pdeps/0010-required-pyarrow-dependency.md b/web/pandas/pdeps/0010-required-pyarrow-dependency.md
index 4d6e928ce68bd..d586c46e243f8 100644
--- a/web/pandas/pdeps/0010-required-pyarrow-dependency.md
+++ b/web/pandas/pdeps/0010-required-pyarrow-dependency.md
@@ -40,7 +40,7 @@ PyArrow is an optional dependency of pandas that provides a wide range of supple
   data types within the `ExtensionArray` interface
 - Since pandas 2.0.0, all I/O readers have the option to return PyArrow-backed data types, and many methods
   now utilize PyArrow compute functions to
-accelerate PyArrow-backed data in pandas, notibly string and datetime types.
+accelerate PyArrow-backed data in pandas, notably string and datetime types.
 
 As of pandas 2.0, one can feasibly utilize PyArrow as an alternative data representation to NumPy with advantages such as:
 
@@ -117,7 +117,7 @@ In[4]: %timeit ser_string.str.startswith("a")
 
 ### Immediate User Benefit 2: Nested Datatypes
 
-Currently, if you try storing `dict`s in a pandas `Series`, you will again get the horrendeous `object` dtype:
+Currently, if you try storing `dict`s in a pandas `Series`, you will again get the horrendous `object` dtype:
 ```python
 In [6]: pd.Series([{'a': 1, 'b': 2}, {'a': 2, 'b': 99}])
 Out[6]:
diff --git a/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md b/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md
index f49193462a44a..71f669825f979 100644
--- a/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md
+++ b/web/pandas/pdeps/0012-compact-and-reversible-JSON-interface.md
@@ -448,7 +448,7 @@ To conclude,
 
 ## Core team decision
 
-Vote was open from september-11 to setpember-26:
+Vote was open from september-11 to september-26:
 
 - Final tally is 0 approvals, 5 abstentions, 7 disapprove. The quorum has been met. The PDEP fails.
 

From 45cafb530c87b424befc762ab00af2167b50c938 Mon Sep 17 00:00:00 2001
From: "Wei-Hsiang (Matt) Wang" <mattwang44@gmail.com>
Date: Sat, 31 Aug 2024 02:09:48 +0800
Subject: [PATCH 041/176] DOC: Remove unnecessary parentheses in `:func:` &
 `:meth:` role (#59663)

remove unnecessary parentheses in func & meth role from doc
---
 doc/source/user_guide/visualization.rst |  2 +-
 doc/source/whatsnew/v0.15.0.rst         | 10 ++--
 doc/source/whatsnew/v0.15.1.rst         |  2 +-
 doc/source/whatsnew/v0.21.0.rst         | 16 +++---
 doc/source/whatsnew/v0.21.1.rst         |  2 +-
 doc/source/whatsnew/v0.23.0.rst         | 22 ++++----
 doc/source/whatsnew/v0.23.1.rst         |  2 +-
 doc/source/whatsnew/v0.24.0.rst         | 74 ++++++++++++-------------
 doc/source/whatsnew/v0.24.2.rst         |  2 +-
 doc/source/whatsnew/v0.25.0.rst         |  8 +--
 doc/source/whatsnew/v1.1.0.rst          | 12 ++--
 doc/source/whatsnew/v1.2.1.rst          |  2 +-
 doc/source/whatsnew/v1.3.0.rst          |  2 +-
 doc/source/whatsnew/v1.4.0.rst          |  2 +-
 14 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst
index 9081d13ef2cf1..66eeb74b363a3 100644
--- a/doc/source/user_guide/visualization.rst
+++ b/doc/source/user_guide/visualization.rst
@@ -1504,7 +1504,7 @@ Plotting with error bars
 
 Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Series.plot`.
 
-Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats:
+Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot`. The error values can be specified using a variety of formats:
 
 * As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`.
 * As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values.
diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
index 70982e723016f..1ee7c5cbc6b9e 100644
--- a/doc/source/whatsnew/v0.15.0.rst
+++ b/doc/source/whatsnew/v0.15.0.rst
@@ -490,7 +490,7 @@ Rolling/expanding moments improvements
   ``ddof`` argument (with a default value of ``1``) was previously undocumented. (:issue:`8064`)
 
 - :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr`
-  now interpret ``min_periods`` in the same manner that the :func:`rolling_*()` and :func:`expanding_*()` functions do:
+  now interpret ``min_periods`` in the same manner that the :func:`rolling_*` and :func:`expanding_*` functions do:
   a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain
   at least ``min_periods`` values. The previous behavior was to set to ``NaN`` the ``min_periods`` entries
   starting with the first non- ``NaN`` value. (:issue:`7977`)
@@ -567,7 +567,7 @@ Rolling/expanding moments improvements
 
   .. warning::
 
-     By default (``ignore_na=False``) the :func:`ewm*()` functions' weights calculation
+     By default (``ignore_na=False``) the :func:`ewm*` functions' weights calculation
      in the presence of missing values is different than in pre-0.15.0 versions.
      To reproduce the pre-0.15.0 calculation of weights in the presence of missing values
      one must specify explicitly ``ignore_na=True``.
@@ -576,7 +576,7 @@ Rolling/expanding moments improvements
   returning results with columns sorted by name and producing an error for non-unique columns;
   now handles non-unique columns and returns columns in original order
   (except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`)
-- Bug in :func:`rolling_count` and :func:`expanding_*()` functions unnecessarily producing error message for zero-length data (:issue:`8056`)
+- Bug in :func:`rolling_count` and :func:`expanding_*` functions unnecessarily producing error message for zero-length data (:issue:`8056`)
 - Bug in :func:`rolling_apply` and :func:`expanding_apply` interpreting ``min_periods=0`` as ``min_periods=1`` (:issue:`8080`)
 - Bug in :func:`expanding_std` and :func:`expanding_var` for a single value producing a confusing error message (:issue:`7900`)
 - Bug in :func:`rolling_std` and :func:`rolling_var` for a single value producing ``0`` rather than ``NaN`` (:issue:`7900`)
@@ -875,7 +875,7 @@ Other notable API changes:
   The behaviour of assigning a column to an existing dataframe as ``df['a'] = i``
   remains unchanged (this already returned an  ``object`` column with a timezone).
 
-- When passing multiple levels to :meth:`~pandas.DataFrame.stack()`, it will now raise a ``ValueError`` when the
+- When passing multiple levels to :meth:`~pandas.DataFrame.stack`, it will now raise a ``ValueError`` when the
   levels aren't all level names or all level numbers (:issue:`7660`). See
   :ref:`Reshaping by stacking and unstacking <reshaping.stack_multiple>`.
 
@@ -1110,7 +1110,7 @@ Other:
 
 - ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`)
 
-- Passing multiple levels to :meth:`~pandas.DataFrame.stack()` will now work when multiple level
+- Passing multiple levels to :meth:`~pandas.DataFrame.stack` will now work when multiple level
   numbers are passed (:issue:`7660`). See
   :ref:`Reshaping by stacking and unstacking <reshaping.stack_multiple>`.
 
diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst
index 765201996d544..f16c9b3f5d45b 100644
--- a/doc/source/whatsnew/v0.15.1.rst
+++ b/doc/source/whatsnew/v0.15.1.rst
@@ -263,7 +263,7 @@ Enhancements
 
 - Raise errors in certain aggregation cases where an argument such as ``numeric_only`` is not handled (:issue:`8592`).
 
-- Added support for 3-character ISO and non-standard country codes in :func:`io.wb.download()` (:issue:`8482`)
+- Added support for 3-character ISO and non-standard country codes in :func:`io.wb.download` (:issue:`8482`)
 
 - World Bank data requests now will warn/raise based
   on an ``errors`` argument, as well as a list of hard-coded country codes and
diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst
index dad69b99ee6a4..43719cd53b0ff 100644
--- a/doc/source/whatsnew/v0.21.0.rst
+++ b/doc/source/whatsnew/v0.21.0.rst
@@ -318,7 +318,7 @@ New keywords
 - :func:`Series.set_axis` and :func:`DataFrame.set_axis` now support the ``inplace`` parameter. (:issue:`14636`)
 - :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
-- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
+- :func:`DataFrame.clip` and :func:`Series.clip` have gained an ``inplace`` argument. (:issue:`15388`)
 - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
 - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
 - :func:`read_json` and :func:`~DataFrame.to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`)
@@ -977,10 +977,10 @@ Other API changes
 Deprecations
 ~~~~~~~~~~~~
 
-- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv()` (:issue:`4191`)
-- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
-- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
-- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`)
+- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv` (:issue:`4191`)
+- :func:`read_excel` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
+- :func:`read_excel` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
+- :func:`read_csv` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`)
 - :meth:`DataFrame.to_csv` has deprecated the ``tupleize_cols`` argument. MultiIndex columns will be always written as rows in the CSV file (:issue:`17060`)
 - The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
@@ -1045,7 +1045,7 @@ return the position of the maximum or minimum.
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`)
+- :func:`read_excel` has dropped the ``has_index_names`` parameter (:issue:`10967`)
 - The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`)
 - The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`)
 - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`)
@@ -1154,7 +1154,7 @@ GroupBy/resample/rolling
 
 - Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
-- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
+- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile` and :func:`DataFrame.quantile` (:issue:`9413`, :issue:`16211`)
 - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
 - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
 - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1`` (:issue:`15305`)
@@ -1194,7 +1194,7 @@ Reshaping
 Numeric
 ^^^^^^^
 - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
-- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`).
+- :func:`Series.clip` and :func:`DataFrame.clip` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`).
 
 
 Categorical
diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst
index bb08d85b5a052..a8f9f9c9e0840 100644
--- a/doc/source/whatsnew/v0.21.1.rst
+++ b/doc/source/whatsnew/v0.21.1.rst
@@ -176,7 +176,7 @@ Categorical
 String
 ^^^^^^
 
-- :meth:`Series.str.split()` will now propagate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
+- :meth:`Series.str.split` will now propagate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`)
 
 
 .. _whatsnew_0.21.1.contributors:
diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
index 663b47a4d2d55..7f7609edc27b6 100644
--- a/doc/source/whatsnew/v0.23.0.rst
+++ b/doc/source/whatsnew/v0.23.0.rst
@@ -574,7 +574,7 @@ Other enhancements
 - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
 - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
 - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
-- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
+- :func:`read_excel` has gained the ``nrows`` parameter (:issue:`16645`)
 - :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`)
 - :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
@@ -1092,10 +1092,10 @@ Other API changes
 - :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`)
 - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`)
 - Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`)
-- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
+- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`)
 - :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`)
 - In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`)
-- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
+- Rearranged the order of keyword arguments in :func:`read_excel` to align with :func:`read_csv` (:issue:`16672`)
 - The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`)
 - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
 - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
@@ -1207,7 +1207,7 @@ Performance improvements
 - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
 - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
 - Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`)
-- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
+- Improved performance of :func:`IntervalIndex.symmetric_difference` (:issue:`18475`)
 - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
 - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
 - Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
@@ -1326,7 +1326,7 @@ Timedelta
 - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`)
 - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`)
 - Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`)
-- Bug in :func:`Timedelta.total_seconds()` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`)
+- Bug in :func:`Timedelta.total_seconds` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`)
 - Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`)
 - Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mismatch (:issue:`19333`)
 - Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`)
@@ -1430,12 +1430,12 @@ IO
 - Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`)
 - Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`)
 - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
-- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
-- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`)
-- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`)
-- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`)
-- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`)
-- Bug in :func:`DataFrame.to_latex()` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`)
+- Bug in :func:`DataFrame.to_latex` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
+- Bug in :func:`DataFrame.to_latex` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`)
+- Bug in :func:`DataFrame.to_latex` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`)
+- Bug in :func:`DataFrame.to_latex` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`)
+- Bug in :func:`DataFrame.to_latex` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`)
+- Bug in :func:`DataFrame.to_latex` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`)
 - Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)
 - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`)
 - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`)
diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst
index 685fe1b3836bf..a98933e7f5969 100644
--- a/doc/source/whatsnew/v0.23.1.rst
+++ b/doc/source/whatsnew/v0.23.1.rst
@@ -106,7 +106,7 @@ Bug fixes
 
 **Data-type specific**
 
-- Bug in :meth:`Series.str.replace()` where the method throws ``TypeError`` on Python 3.5.2 (:issue:`21078`)
+- Bug in :meth:`Series.str.replace` where the method throws ``TypeError`` on Python 3.5.2 (:issue:`21078`)
 - Bug in :class:`Timedelta` where passing a float with a unit would prematurely round the float precision (:issue:`14156`)
 - Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 8ddc8e5d058ca..60e77a8c5d8c5 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -418,7 +418,7 @@ Other enhancements
 - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
 - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`)
 - :meth:`Index.difference`, :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference` now have an optional ``sort`` parameter to control whether the results should be sorted if possible (:issue:`17839`, :issue:`24471`)
-- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
+- :meth:`read_excel` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
 - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
 - :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed string columns to Stata strl format (:issue:`23633`)
 - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`)
@@ -723,8 +723,8 @@ Time values in ``dt.end_time`` and ``to_timestamp(how='end')``
 
 The time values in :class:`Period` and :class:`PeriodIndex` objects are now set
 to '23:59:59.999999999' when calling :attr:`Series.dt.end_time`, :attr:`Period.end_time`,
-:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp()` with ``how='end'``,
-or :func:`PeriodIndex.to_timestamp()` with ``how='end'`` (:issue:`17157`)
+:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp` with ``how='end'``,
+or :func:`PeriodIndex.to_timestamp` with ``how='end'`` (:issue:`17157`)
 
 *Previous behavior*:
 
@@ -1289,15 +1289,15 @@ ways of adding operator support.
 
 - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
 - :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`)
-- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
-- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
+- :meth:`Series.combine` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
+- :meth:`Series.combine` with scalar argument now works for any function type (:issue:`21248`)
 - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185`).
 - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`)
 - Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`)
 - :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
 - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
-- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
+- Bug when grouping :meth:`Dataframe.groupby` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
 - Bug in :func:`pandas.merge` when merging on an extension array-backed column (:issue:`23020`).
 
 
@@ -1586,7 +1586,7 @@ Categorical
 - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`).
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
 - In :meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`)
-- Bug when resampling :meth:`DataFrame.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
+- Bug when resampling :meth:`DataFrame.resample` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
 - Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 - Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
 - Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`)
@@ -1656,7 +1656,7 @@ Timedelta
 - Fixed bug in adding a :class:`DataFrame` with all-``timedelta64[ns]`` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`)
 - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`)
 - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-``NaT`` :class:`DatetimeIndex` instead of an all-``NaT`` :class:`TimedeltaIndex` (:issue:`23215`)
-- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
+- Bug in :class:`Timedelta` and :func:`to_timedelta` have inconsistencies in supported unit string (:issue:`21762`)
 - Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`)
 - Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`)
 - Bug in :class:`Timedelta` comparisons when comparing with a ``Tick`` object incorrectly raising ``TypeError`` (:issue:`24710`)
@@ -1803,39 +1803,39 @@ IO
 - Bug in :func:`read_csv` in which unicode column names were not being properly recognized with Python 2.x (:issue:`13253`)
 - Bug in :meth:`DataFrame.to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`)
 - Bug in :meth:`DataFrame.to_sql` where a naive :class:`DatetimeIndex` would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`)
-- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`)
-- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
-- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
-- :func:`read_csv()` and :func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`)
-- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
-- Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`)
-- Bug in :func:`read_csv()` in unnamed columns were being improperly identified when extracting a multi-index (:issue:`23687`)
-- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
-- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`)
-- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
-- Bug in :func:`read_sas()` in which an incorrect error was raised on an invalid file format. (:issue:`24548`)
+- Bug in :meth:`read_excel` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`)
+- :func:`read_html` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
+- :func:`read_excel` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
+- :func:`read_csv` and :func:`read_table` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`)
+- :func:`read_csv` will correctly parse timezone-aware datetimes (:issue:`22256`)
+- Bug in :func:`read_csv` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`)
+- Bug in :func:`read_csv` in unnamed columns were being improperly identified when extracting a multi-index (:issue:`23687`)
+- :func:`read_sas` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
+- :func:`read_sas` will correctly parse sas7bdat files with many columns (:issue:`22628`)
+- :func:`read_sas` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
+- Bug in :func:`read_sas` in which an incorrect error was raised on an invalid file format. (:issue:`24548`)
 - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
-- Bug in :func:`DataFrame.to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`)
-- Bug in :func:`DataFrame.to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`)
-- Bug in :func:`DataFrame.to_html()` with ``index_names=False`` displaying index name (:issue:`22747`)
-- Bug in :func:`DataFrame.to_html()` with ``header=False`` not displaying row index names (:issue:`23788`)
-- Bug in :func:`DataFrame.to_html()` with ``sparsify=False`` that caused it to raise ``TypeError`` (:issue:`22887`)
-- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
-- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
+- Bug in :func:`DataFrame.to_html` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`)
+- Bug in :func:`DataFrame.to_html` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`)
+- Bug in :func:`DataFrame.to_html` with ``index_names=False`` displaying index name (:issue:`22747`)
+- Bug in :func:`DataFrame.to_html` with ``header=False`` not displaying row index names (:issue:`23788`)
+- Bug in :func:`DataFrame.to_html` with ``sparsify=False`` that caused it to raise ``TypeError`` (:issue:`22887`)
+- Bug in :func:`DataFrame.to_string` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
+- Bug in :func:`DataFrame.to_string` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
 - :class:`HDFStore` will raise ``ValueError`` when the ``format`` kwarg is passed to the constructor (:issue:`13291`)
 - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
-- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`)
-- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
-- Bug in :func:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
-- Bug in :func:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
-- Bug in :func:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
-- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
-- Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
-- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
-- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
+- Bug in :func:`read_csv` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`)
+- Bug in :func:`read_csv` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
+- Bug in :func:`read_csv` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
+- Bug in :func:`read_csv` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
+- Bug in :func:`read_html` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
+- Bug in :meth:`read_excel` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
+- Bug in :meth:`read_excel` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
+- Bug in :meth:`read_excel` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
+- Bug in :meth:`read_excel` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
 - Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
-- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
+- :func:`DataFrame.to_string`, :func:`DataFrame.to_html`, :func:`DataFrame.to_latex` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
 - Bug in :func:`read_csv` that caused the C engine on Python 3.6+ on Windows to improperly read CSV filenames with accented or special characters (:issue:`15086`)
 - Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`)
@@ -1843,7 +1843,7 @@ IO
 - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
 - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)
 - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`)
-- Bug in :func:`DataFrame.to_string()` and more generally in the floating ``repr`` formatter. Zeros were not trimmed if ``inf`` was present in a columns while it was the case with NA values. Zeros are now trimmed as in the presence of NA (:issue:`24861`).
+- Bug in :func:`DataFrame.to_string` and more generally in the floating ``repr`` formatter. Zeros were not trimmed if ``inf`` was present in a columns while it was the case with NA values. Zeros are now trimmed as in the presence of NA (:issue:`24861`).
 - Bug in the ``repr`` when truncating the number of columns and having a wide last column (:issue:`24849`).
 
 Plotting
diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
index 9a8c2ee5d00fa..d8f2e17cb9e4f 100644
--- a/doc/source/whatsnew/v0.24.2.rst
+++ b/doc/source/whatsnew/v0.24.2.rst
@@ -22,7 +22,7 @@ Fixed regressions
 - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`)
 - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`)
 - Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`)
-- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`)
+- Fixed regression in :meth:`DataFrame.duplicated`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`)
 - Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ``Categorical`` data (:issue:`25299`)
 - Fixed regression in subtraction between :class:`Series` objects with ``datetime64[ns]`` dtype incorrectly raising ``OverflowError`` when the ``Series`` on the right contains null values (:issue:`25317`)
 - Fixed regression in :class:`TimedeltaIndex` where ``np.sum(index)`` incorrectly returned a zero-dimensional object instead of a scalar (:issue:`25282`)
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 243714651e3b9..bddb47cd3f629 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -988,7 +988,7 @@ Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
-- :meth:`DataFrame.to_stata()` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`)
+- :meth:`DataFrame.to_stata` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`)
 - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
   int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
 - Improved performance of :meth:`.GroupBy.quantile` (:issue:`20405`)
@@ -1088,7 +1088,7 @@ Numeric
 Conversion
 ^^^^^^^^^^
 
-- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`)
+- Bug in :func:`DataFrame.astype` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`)
 -
 
 Strings
@@ -1146,7 +1146,7 @@ MultiIndex
 IO
 ^^
 
-- Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`)
+- Bug in :func:`DataFrame.to_html` where values were truncated using display options instead of outputting the full content (:issue:`17004`)
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
@@ -1171,7 +1171,7 @@ IO
 - Fixed bug in :func:`pandas.read_csv` where a BOM would result in incorrect parsing using engine='python' (:issue:`26545`)
 - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`)
 - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`).
-- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. ``PeriodIndex``) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`)
+- Fixed bug in :func:`DataFrame.to_excel` where custom objects (i.e. ``PeriodIndex``) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`)
 - Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`)
 - Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`)
 - Fixed bug in :meth:`DataFrame.to_parquet` which would raise a ``ValueError`` when the dataframe had no columns (:issue:`27339`)
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 37d021efddf0b..b199b113d26f2 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1083,7 +1083,7 @@ IO
   timestamps with ``version="2.0"`` (:issue:`31652`).
 - Bug in :func:`read_csv` was raising ``TypeError`` when ``sep=None`` was used in combination with ``comment`` keyword (:issue:`31396`)
 - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a :class:`DataFrame` in Python 3 from fixed format written in Python 2 (:issue:`31750`)
-- :func:`read_sas()` now handles dates and datetimes larger than :attr:`Timestamp.max` returning them as :class:`datetime.datetime` objects (:issue:`20927`)
+- :func:`read_sas` now handles dates and datetimes larger than :attr:`Timestamp.max` returning them as :class:`datetime.datetime` objects (:issue:`20927`)
 - Bug in :meth:`DataFrame.to_json` where ``Timedelta`` objects would not be serialized correctly with ``date_format="iso"`` (:issue:`28256`)
 - :func:`read_csv` will raise a ``ValueError`` when the column names passed in ``parse_dates`` are missing in the :class:`Dataframe` (:issue:`31251`)
 - Bug in :func:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`)
@@ -1174,13 +1174,13 @@ Reshaping
 - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregate a non-existent column (:issue:`32755`)
 - Bug in :meth:`DataFrame.unstack` when :class:`MultiIndex` columns and :class:`MultiIndex` rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
 - Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True`` instead of ``TypeError: Can only append a :class:`Series` if ignore_index=True or if the :class:`Series` has a name`` (:issue:`30871`)
-- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`,
-  :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`,
-  :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not
+- Bug in :meth:`DataFrame.corrwith`, :meth:`DataFrame.memory_usage`, :meth:`DataFrame.dot`,
+  :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.duplicated`, :meth:`DataFrame.isin`,
+  :meth:`DataFrame.count`, :meth:`Series.explode`, :meth:`Series.asof` and :meth:`DataFrame.asof` not
   returning subclassed types. (:issue:`31331`)
 - Bug in :func:`concat` was not allowing for concatenation of :class:`DataFrame` and :class:`Series` with duplicate keys (:issue:`33654`)
 - Bug in :func:`cut` raised an error when the argument ``labels`` contains duplicates (:issue:`33141`)
-- Ensure only named functions can be used in :func:`eval()` (:issue:`32460`)
+- Ensure only named functions can be used in :func:`eval` (:issue:`32460`)
 - Bug in :meth:`Dataframe.aggregate` and :meth:`Series.aggregate` was causing a recursive loop in some cases (:issue:`34224`)
 - Fixed bug in :func:`melt` where melting :class:`MultiIndex` columns with ``col_level > 0`` would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`)
 - Bug in :meth:`Series.where` with an empty :class:`Series` and empty ``cond`` having non-bool dtype (:issue:`34592`)
@@ -1203,7 +1203,7 @@ ExtensionArray
 - Fixed bug in :func:`concat` when concatenating :class:`DataFrame` objects with non-overlapping columns resulting in object-dtype columns rather than preserving the extension dtype (:issue:`27692`, :issue:`33027`)
 - Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`)
 - Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`)
-- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
+- Fixed bug that caused :meth:`Series.__repr__` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
 - Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`)
 - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
 - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable Boolean dtypes (:issue:`34051`)
diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index 25e616dcdf37f..cb344cc728566 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -22,7 +22,7 @@ Fixed regressions
 - Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`)
 - Fixed regression in setting with :meth:`DataFrame.loc`  raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`)
 - Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`)
-- Fixed regression in :meth:`~DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
+- Fixed regression in :meth:`~DataFrame.groupby` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
 - Fixed regression in :meth:`.DataFrameGroupBy.sem` and :meth:`.SeriesGroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`)
 - Fixed regression in :meth:`.DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
 - Fixed regression in :meth:`DataFrame.groupby` when aggregating an ``ExtensionDType`` that could fail for non-numeric values (:issue:`38980`)
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 17aab87b93f8e..0e2d487a89ff5 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -975,7 +975,7 @@ Numeric
 - Bug in :class:`Series` and :class:`DataFrame` reductions with methods ``any`` and ``all`` not returning Boolean results for object data (:issue:`12863`, :issue:`35450`, :issue:`27709`)
 - Bug in :meth:`Series.clip` would fail if the Series contains NA values and has nullable int or float as a data type (:issue:`40851`)
 - Bug in :meth:`UInt64Index.where` and :meth:`UInt64Index.putmask` with an ``np.int64`` dtype ``other`` incorrectly raising ``TypeError`` (:issue:`41974`)
-- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggregation functions when one or more aggregation function fails to produce results (:issue:`33634`)
+- Bug in :meth:`DataFrame.agg` not sorting the aggregated axis in the order of the provided aggregation functions when one or more aggregation function fails to produce results (:issue:`33634`)
 - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
 
 Conversion
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 91953f693190c..7b1aef07e5f00 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -1045,7 +1045,7 @@ Reshaping
 - Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`)
 - Bug in :func:`merge` raising ``KeyError`` when joining over differently named indexes with on keywords (:issue:`45094`)
 - Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`)
-- Bug in :meth:`MultiIndex.join()` with overlapping ``IntervalIndex`` levels (:issue:`44096`)
+- Bug in :meth:`MultiIndex.join` with overlapping ``IntervalIndex`` levels (:issue:`44096`)
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` results is different ``dtype`` based on ``regex`` parameter (:issue:`44864`)
 - Bug in :meth:`DataFrame.pivot` with ``index=None`` when the :class:`DataFrame` index was a :class:`MultiIndex` (:issue:`23955`)
 

From 350904cc8fd36feaf3dee47ac5fcf8a76d60c87b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 30 Aug 2024 08:13:28 -1000
Subject: [PATCH 042/176] REF: Use range over arange for iterators (#59619)

---
 pandas/core/generic.py  | 2 +-
 pandas/core/indexing.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index dce462f3eabb1..bc47b662a08d3 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7051,7 +7051,7 @@ def fillna(
                         # see test_fillna_dict_inplace_nonunique_columns
                         locs = result.columns.get_loc(k)
                         if isinstance(locs, slice):
-                            locs = np.arange(self.shape[1])[locs]
+                            locs = range(self.shape[1])[locs]
                         elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
                             locs = locs.nonzero()[0]
                         elif not (
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index debb5bdd4fc4b..08bd3cde60806 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -2308,11 +2308,11 @@ def _ensure_iterable_column_indexer(self, column_indexer):
         """
         Ensure that our column indexer is something that can be iterated over.
         """
-        ilocs: Sequence[int | np.integer] | np.ndarray
+        ilocs: Sequence[int | np.integer] | np.ndarray | range
         if is_integer(column_indexer):
             ilocs = [column_indexer]
         elif isinstance(column_indexer, slice):
-            ilocs = np.arange(len(self.obj.columns))[column_indexer]
+            ilocs = range(len(self.obj.columns))[column_indexer]
         elif (
             isinstance(column_indexer, np.ndarray) and column_indexer.dtype.kind == "b"
         ):

From 94a7c146a8344d49278b28bfee5fe2583d7d8d13 Mon Sep 17 00:00:00 2001
From: musvaage <112724366+musvaage@users.noreply.github.com>
Date: Fri, 30 Aug 2024 21:37:07 +0200
Subject: [PATCH 043/176] fix typo (#59669)

typo
---
 doc/source/user_guide/io.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index b996e1706ca2f..fa64bce60caf4 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -169,7 +169,7 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram
   implementation when "numpy_nullable" is set, pyarrow is used for all
   dtypes if "pyarrow" is set.
 
-  The dtype_backends are still experiential.
+  The dtype_backends are still experimental.
 
   .. versionadded:: 2.0
 

From c345ffd02e79267306905d313729f3733976714e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 31 Aug 2024 09:46:27 -0700
Subject: [PATCH 044/176] DEPR (string): non-bool na for obj.str.contains
 (#59615)

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v2.3.0.rst            |  2 +-
 pandas/core/arrays/string_arrow.py        | 10 +++++
 pandas/core/strings/object_array.py       | 26 +++++++++++++
 pandas/tests/strings/test_find_replace.py | 46 ++++++++++++++++++++++-
 4 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 528226502da33..8a64aa7c609d6 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -53,7 +53,7 @@ notable_bug_fix1
 
 Deprecations
 ~~~~~~~~~~~~
--
+- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index e552f1960bf8c..e8e74b0ba1215 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -6,6 +6,7 @@
     TYPE_CHECKING,
     Union,
 )
+import warnings
 
 import numpy as np
 
@@ -19,6 +20,7 @@
     pa_version_under10p1,
     pa_version_under13p0,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_scalar,
@@ -297,6 +299,14 @@ def _str_contains(
             result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
         result = self._convert_bool_result(result, na=na)
         if not isna(na):
+            if not isinstance(na, bool):
+                # GH#59561
+                warnings.warn(
+                    "Allowing a non-bool 'na' in obj.str.contains is deprecated "
+                    "and will raise in a future version.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
             result[isna(result)] = bool(na)
         return result
 
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 100afa956bd24..c6b18d7049c57 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -9,12 +9,14 @@
     cast,
 )
 import unicodedata
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
 import pandas._libs.missing as libmissing
 import pandas._libs.ops as libops
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.missing import isna
 
@@ -142,14 +144,38 @@ def _str_contains(
             else:
                 upper_pat = pat.upper()
                 f = lambda x: upper_pat in x.upper()
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.contains is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na, dtype=np.dtype("bool"))
 
     def _str_startswith(self, pat, na=None):
         f = lambda x: x.startswith(pat)
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.startswith is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.endswith is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_replace(
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 00677ef4fcfe9..bf01c4996bb32 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -166,7 +166,16 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
     # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416
 
     values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)
-    result = values.str.contains("a", na=na, regex=regex)
+
+    msg = (
+        "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
+        "will raise in a future version"
+    )
+    warn = None
+    if not pd.isna(na) and not isinstance(na, bool):
+        warn = FutureWarning
+    with tm.assert_produces_warning(warn, match=msg):
+        result = values.str.contains("a", na=na, regex=regex)
     expected = Series([True, False, False, True, expected], dtype="boolean")
     tm.assert_series_equal(result, expected)
 
@@ -232,7 +241,12 @@ def test_contains_nan(any_string_dtype):
     expected = Series([True, True, True], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
-    result = s.str.contains("foo", na="foo")
+    msg = (
+        "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
+        "will raise in a future version"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = s.str.contains("foo", na="foo")
     if any_string_dtype == "object":
         expected = Series(["foo", "foo", "foo"], dtype=np.object_)
     elif any_string_dtype.na_value is np.nan:
@@ -254,6 +268,34 @@ def test_contains_nan(any_string_dtype):
 # --------------------------------------------------------------------------------------
 
 
+def test_startswith_endswith_validate_na(any_string_dtype):
+    # GH#59615
+    ser = Series(
+        ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
+        dtype=any_string_dtype,
+    )
+
+    dtype = ser.dtype
+    if (
+        isinstance(dtype, pd.StringDtype) and dtype.storage == "python"
+    ) or dtype == np.dtype("object"):
+        msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            ser.str.startswith("kapow", na="baz")
+        msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            ser.str.endswith("bar", na="baz")
+    else:
+        # TODO(infer_string): don't surface pyarrow errors
+        import pyarrow as pa
+
+        msg = "Could not convert 'baz' with type str: tried to convert to boolean"
+        with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+            ser.str.startswith("kapow", na="baz")
+        with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+            ser.str.endswith("kapow", na="baz")
+
+
 @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
 @pytest.mark.parametrize("dtype", ["object", "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])

From 9b494865fcfacbea9bc069a77a2d01632c4a0da4 Mon Sep 17 00:00:00 2001
From: "Mien (Josephine) Nguyen" <josephinee.nguyen@gmail.com>
Date: Sat, 31 Aug 2024 12:49:24 -0400
Subject: [PATCH 045/176] TST: Add SparseArray _arith_method test to fail on
 incorrect comparison operator (#59648)

* Test

* Match

* Rerun GitHub actions

* Initialize inside func
---
 pandas/tests/arrays/sparse/test_arithmetics.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
index f84d03e851621..a47e73d49674d 100644
--- a/pandas/tests/arrays/sparse/test_arithmetics.py
+++ b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -468,6 +468,19 @@ def test_mismatched_length_cmp_op(cons):
         left & right
 
 
+@pytest.mark.parametrize(
+    "a, b",
+    [
+        ([0, 1, 2], [0, 1, 2, 3]),
+        ([0, 1, 2, 3], [0, 1, 2]),
+    ],
+)
+def test_mismatched_length_arith_op(a, b, all_arithmetic_functions):
+    op = all_arithmetic_functions
+    with pytest.raises(AssertionError, match=f"length mismatch: {len(a)} vs. {len(b)}"):
+        op(SparseArray(a, fill_value=0), np.array(b))
+
+
 @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
 @pytest.mark.parametrize("fill_value", [np.nan, 3])
 def test_binary_operators(op, fill_value):

From cf82a458b01de207980d96eb9495434ae3f242a1 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 31 Aug 2024 22:20:22 +0530
Subject: [PATCH 046/176] DOC: fix PR01,SA01 for pandas.Period.strftime
 (#59673)

---
 ci/code_checks.sh              |  1 -
 pandas/_libs/tslibs/period.pyx | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2f95367266a36..bb3ceded880f4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.NA SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
-        -i "pandas.Period.strftime PR01,SA01" \
         -i "pandas.Period.to_timestamp SA01" \
         -i "pandas.PeriodDtype SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index b3f06565c7789..e4771feeb804e 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -2755,6 +2755,27 @@ cdef class _Period(PeriodMixin):
         | ``%%``    | A literal ``'%'`` character.   |       |
         +-----------+--------------------------------+-------+
 
+        The `strftime` method provides a way to represent a :class:`Period`
+        object as a string in a specified format. This is particularly useful
+        when displaying date and time data in different locales or customized
+        formats, suitable for reports or user interfaces. It extends the standard
+        Python string formatting capabilities with additional directives specific
+        to `pandas`, accommodating features like fiscal years and precise
+        sub-second components.
+
+        Parameters
+        ----------
+        fmt : str or None
+            String containing the desired format directives. If ``None``, the
+            format is determined based on the Period's frequency.
+
+        See Also
+        --------
+        Timestamp.strftime : Return a formatted string of the Timestamp.
+        to_datetime : Convert argument to datetime.
+        time.strftime : Format a time object as a string according to a
+            specified format string in the standard Python library.
+
         Notes
         -----
 

From bd7db4dcc48848f001cd06bff09126eaaaa278c6 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 31 Aug 2024 22:21:07 +0530
Subject: [PATCH 047/176] DOC: fix SA01 for pandas.PeriodDtype (#59674)

---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/dtypes.py | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index bb3ceded880f4..728b0fafbb8be 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -74,7 +74,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.Period.to_timestamp SA01" \
-        -i "pandas.PeriodDtype SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.RangeIndex PR07" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index c0587d36bcb5a..d3ad36c74d3fb 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -986,6 +986,14 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
     -------
     None
 
+    See Also
+    --------
+    Period : Represents a single time period.
+    PeriodIndex : Immutable index for period data.
+    date_range : Return a fixed frequency DatetimeIndex.
+    Series : One-dimensional array with axis labels.
+    DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.
+
     Examples
     --------
     >>> pd.PeriodDtype(freq="D")

From 8566ff3ba73aec225892944c5abd0e2e002a7732 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 31 Aug 2024 22:21:54 +0530
Subject: [PATCH 048/176] DOC: fix PR07 for pandas.RangeIndex (#59675)

---
 ci/code_checks.sh            | 1 -
 pandas/core/indexes/range.py | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 728b0fafbb8be..6a551e2885f4c 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.Period.to_timestamp SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
-        -i "pandas.RangeIndex PR07" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.start SA01" \
         -i "pandas.RangeIndex.step SA01" \
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index ce9e639656acb..b11ce6bd7b919 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -90,7 +90,9 @@ class RangeIndex(Index):
     start : int (default: 0), range, or other RangeIndex instance
         If int and "stop" is not given, interpreted as "stop" instead.
     stop : int (default: 0)
+        The end value of the range (exclusive).
     step : int (default: 1)
+        The step size of the range.
     dtype : np.int64
         Unused, accepted for homogeneity with other index types.
     copy : bool, default False

From 078e11fbcd00b71790f7ea8d87f04041247c89fa Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 31 Aug 2024 22:22:47 +0530
Subject: [PATCH 049/176] DOC: fix PR01,SA01,ES01 for
 pandas.errors.AbstractMethodError (#59679)

---
 ci/code_checks.sh         |  1 -
 pandas/errors/__init__.py | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 6a551e2885f4c..41418e75b171d 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -221,7 +221,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
         -i "pandas.date_range RT03" \
-        -i "pandas.errors.AbstractMethodError PR01,SA01" \
         -i "pandas.errors.AttributeConflictWarning SA01" \
         -i "pandas.errors.CSSWarning SA01" \
         -i "pandas.errors.CategoricalConversionWarning SA01" \
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 6d571031636b5..2f625090e0492 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -285,6 +285,30 @@ class AbstractMethodError(NotImplementedError):
     """
     Raise this error instead of NotImplementedError for abstract methods.
 
+    The `AbstractMethodError` is designed for use in classes that follow an abstract
+    base class pattern. By raising this error in the method, it ensures that a subclass
+    must implement the method to provide specific functionality. This is useful in a
+    framework or library where certain methods must be implemented by the user to
+    ensure correct behavior.
+
+    Parameters
+    ----------
+    class_instance : object
+        The instance of the class where the abstract method is being called.
+    methodtype : str, default "method"
+        A string indicating the type of method that is abstract.
+        Must be one of {"method", "classmethod", "staticmethod", "property"}.
+
+    See Also
+    --------
+    api.extensions.ExtensionArray
+        An example of a pandas extension mechanism that requires implementing
+        specific abstract methods.
+    NotImplementedError
+        A built-in exception that can also be used for abstract methods but lacks
+        the specificity of `AbstractMethodError` in indicating the need for subclass
+        implementation.
+
     Examples
     --------
     >>> class Foo:

From e07453e24d9b0d91e85f74b540135a55433d125f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 2 Sep 2024 02:50:51 -1000
Subject: [PATCH 050/176] REF: Use numpy methods instead of np.array (#59672)

---
 pandas/core/array_algos/quantile.py |  2 +-
 pandas/core/arrays/_ranges.py       | 14 +++-----------
 pandas/core/dtypes/dtypes.py        |  2 +-
 pandas/core/groupby/groupby.py      |  7 ++++---
 pandas/core/indexes/multi.py        |  4 ++--
 pandas/core/tools/datetimes.py      |  2 +-
 pandas/core/window/rolling.py       |  2 +-
 7 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
index b2f78182b9bf0..8a920d1849bb3 100644
--- a/pandas/core/array_algos/quantile.py
+++ b/pandas/core/array_algos/quantile.py
@@ -91,7 +91,7 @@ def quantile_with_mask(
     if is_empty:
         # create the array of na_values
         # 2d len(values) * len(qs)
-        flat = np.array([fill_value] * len(qs))
+        flat = np.full(len(qs), fill_value)
         result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
     else:
         result = _nanquantile(
diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py
index 600ddc7f717a8..88f5ac4ebdea4 100644
--- a/pandas/core/arrays/_ranges.py
+++ b/pandas/core/arrays/_ranges.py
@@ -18,6 +18,8 @@
     iNaT,
 )
 
+from pandas.core.construction import range_to_ndarray
+
 if TYPE_CHECKING:
     from pandas._typing import npt
 
@@ -82,17 +84,7 @@ def generate_regular_range(
             "at least 'start' or 'end' should be specified if a 'period' is given."
         )
 
-    with np.errstate(over="raise"):
-        # If the range is sufficiently large, np.arange may overflow
-        #  and incorrectly return an empty array if not caught.
-        try:
-            values = np.arange(b, e, stride, dtype=np.int64)
-        except FloatingPointError:
-            xdr = [b]
-            while xdr[-1] != e:
-                xdr.append(xdr[-1] + stride)
-            values = np.array(xdr[:-1], dtype=np.int64)
-    return values
+    return range_to_ndarray(range(b, e, stride))
 
 
 def _generate_range_overflow_safe(
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index d3ad36c74d3fb..6928335330e66 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -513,7 +513,7 @@ def _hash_categories(self) -> int:
                 [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]
             )
         else:
-            cat_array = np.array([cat_array])
+            cat_array = cat_array.reshape(1, len(cat_array))
         combined_hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
         return np.bitwise_xor.reduce(combined_hashed)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8c9c92594ebe7..5b718f0ce2a29 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4370,11 +4370,12 @@ def post_processor(
 
             return vals
 
-        qs = np.array(q, dtype=np.float64)
-        pass_qs: np.ndarray | None = qs
         if is_scalar(q):
             qs = np.array([q], dtype=np.float64)
-            pass_qs = None
+            pass_qs: None | np.ndarray = None
+        else:
+            qs = np.asarray(q, dtype=np.float64)
+            pass_qs = qs
 
         ids = self._grouper.ids
         ngroups = self._grouper.ngroups
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 878170c8377dc..a4b92e70427ce 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2686,9 +2686,9 @@ def _get_codes_for_sorting(self) -> list[Categorical]:
         a valid valid
         """
 
-        def cats(level_codes):
+        def cats(level_codes: np.ndarray) -> np.ndarray:
             return np.arange(
-                np.array(level_codes).max() + 1 if len(level_codes) else 0,
+                level_codes.max() + 1 if len(level_codes) else 0,
                 dtype=level_codes.dtype,
             )
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 0e91bfa99e887..86c7316320f44 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -418,7 +418,7 @@ def _convert_listlike_datetimes(
         arg, _ = maybe_convert_dtype(arg, copy=False, tz=libtimezones.maybe_get_tz(tz))
     except TypeError:
         if errors == "coerce":
-            npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
+            npvalues = np.full(len(arg), np.datetime64("NaT", "ns"))
             return DatetimeIndex(npvalues, name=name)
         raise
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 37dcf3bc14282..9ea825ad4e44d 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1186,7 +1186,7 @@ def homogeneous_func(values: np.ndarray):
                 return values.copy()
 
             def calc(x):
-                additional_nans = np.array([np.nan] * offset)
+                additional_nans = np.full(offset, np.nan)
                 x = np.concatenate((x, additional_nans))
                 return func(
                     x,

From 2c7674047e28f3a6eefedeeff20aaff03dca7300 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 2 Sep 2024 20:00:59 +0200
Subject: [PATCH 051/176] TST (string dtype): fix and clean up arrow roundtrip
 tests (#59678)

* TST (string dtype): fix and clean up arrow roundtrip tests

* fix using_infer_string
---
 pandas/tests/arrays/masked/test_arrow_compat.py | 11 +++--------
 pandas/tests/arrays/string_/test_string.py      | 14 ++++++++++----
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py
index c719e19a7c8d1..d99b1118444c9 100644
--- a/pandas/tests/arrays/masked/test_arrow_compat.py
+++ b/pandas/tests/arrays/masked/test_arrow_compat.py
@@ -1,17 +1,12 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 
-pytestmark = [
-    pytest.mark.filterwarnings(
-        "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
-    ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
-]
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
 
 
 pa = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 91ad01a2fb0eb..dd87dbf8e9a43 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -522,7 +522,6 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
     # roundtrip possible from arrow 1.0.0
@@ -541,13 +540,16 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
         assert result["a"].dtype == "object"
     else:
         assert isinstance(result["a"].dtype, pd.StringDtype)
-        expected = df.astype(f"string[{string_storage}]")
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
         tm.assert_frame_equal(result, expected)
         # ensure the missing value is represented by NA and not np.nan or None
         assert result.loc[2, "a"] is result["a"].dtype.na_value
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     # GH-41040
@@ -569,7 +571,11 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
         assert result["a"].dtype == "object"
     else:
         assert isinstance(result["a"].dtype, pd.StringDtype)
-        expected = df.astype(f"string[{string_storage}]")
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
         tm.assert_frame_equal(result, expected)
 
 
From db1b8ab2fc1a65c2e520dc9ffb6390743a00d63b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2024 11:47:39 -0700
Subject: [PATCH 052/176] API (string): str.center with pyarrow-backed string
 dtype (#59624)

---
 doc/source/whatsnew/v2.3.0.rst             |  3 ++-
 pandas/core/arrays/_arrow_string_mixins.py | 20 ++++++++++++++++++--
 pandas/core/arrays/string_arrow.py         |  2 +-
 pandas/tests/strings/test_case_justify.py  |  6 +-----
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 8a64aa7c609d6..03355f655eb28 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -103,7 +103,8 @@ Conversion
 Strings
 ^^^^^^^
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
--
+- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
+
 
 Interval
 ^^^^^^^^
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index c810af32f7480..b5cf0573e70ba 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from functools import partial
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -7,7 +8,10 @@
 
 import numpy as np
 
-from pandas.compat import pa_version_under10p1
+from pandas.compat import (
+    pa_version_under10p1,
+    pa_version_under17p0,
+)
 
 from pandas.core.dtypes.missing import isna
 
@@ -49,7 +53,19 @@ def _str_pad(
         elif side == "right":
             pa_pad = pc.utf8_rpad
         elif side == "both":
-            pa_pad = pc.utf8_center
+            if pa_version_under17p0:
+                # GH#59624 fall back to object dtype
+                from pandas import array
+
+                obj_arr = self.astype(object, copy=False)  # type: ignore[attr-defined]
+                obj = array(obj_arr, dtype=object)
+                result = obj._str_pad(width, side, fillchar)  # type: ignore[attr-defined]
+                return type(self)._from_sequence(result, dtype=self.dtype)  # type: ignore[attr-defined]
+            else:
+                # GH#54792
+                # https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
+                lean_left = (width % 2) == 0
+                pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
         else:
             raise ValueError(
                 f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index e8e74b0ba1215..a3169985828e8 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -284,6 +284,7 @@ def astype(self, dtype, copy: bool = True):
     _str_map = BaseStringArray._str_map
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
+    _str_pad = ArrowStringArrayMixin._str_pad
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -546,7 +547,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _str_get = ArrowStringArrayMixin._str_get
     _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
     _str_capitalize = ArrowStringArrayMixin._str_capitalize
-    _str_pad = ArrowStringArrayMixin._str_pad
     _str_title = ArrowStringArrayMixin._str_title
     _str_swapcase = ArrowStringArrayMixin._str_swapcase
     _str_slice_replace = ArrowStringArrayMixin._str_slice_replace
diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index 41aedae90ca76..819556f961fa3 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -291,11 +291,7 @@ def test_center_ljust_rjust_mixed_object():
 
 
 def test_center_ljust_rjust_fillchar(any_string_dtype):
-    if any_string_dtype == "string[pyarrow_numpy]":
-        pytest.skip(
-            "Arrow logic is different, "
-            "see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
-        )
+    # GH#54533, GH#54792
     s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
 
     result = s.str.center(5, fillchar="X")

From 2244402942dbd30bdf367ceae49937c179e42bcb Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 3 Sep 2024 06:44:42 -1000
Subject: [PATCH 053/176] PERF: Only clear cached .levels when setting
 MultiIndex.names (#59578)

* PERF: Only clear cached .levels when setting MultiIndex.names

* whatsnew number
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/core/indexes/multi.py                  | 24 +++++++++----------
 .../multiindex/test_chaining_and_caching.py   | 17 +++++++++++++
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index da0d85b7bb529..c10dbb63e54f0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -503,6 +503,7 @@ Performance improvements
 - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
 - :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
 - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
+- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
 - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a4b92e70427ce..9eccb7645fbe7 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -799,7 +799,7 @@ def dtypes(self) -> Series:
         """
         from pandas import Series
 
-        names = com.fill_missing_names([level.name for level in self.levels])
+        names = com.fill_missing_names(self.names)
         return Series([level.dtype for level in self.levels], index=Index(names))
 
     def __len__(self) -> int:
@@ -1572,7 +1572,7 @@ def _format_multi(
     def _get_names(self) -> FrozenList:
         return FrozenList(self._names)
 
-    def _set_names(self, names, *, level=None, validate: bool = True) -> None:
+    def _set_names(self, names, *, level=None) -> None:
         """
         Set new names on index. Each name has to be a hashable type.
 
@@ -1583,8 +1583,6 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
         level : int, level name, or sequence of int/level names (default None)
             If the index is a MultiIndex (hierarchical), level(s) to set (None
             for all levels).  Otherwise level must be None
-        validate : bool, default True
-            validate that the names match level lengths
 
         Raises
         ------
@@ -1603,13 +1601,12 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
             raise ValueError("Names should be list-like for a MultiIndex")
         names = list(names)
 
-        if validate:
-            if level is not None and len(names) != len(level):
-                raise ValueError("Length of names must match length of level.")
-            if level is None and len(names) != self.nlevels:
-                raise ValueError(
-                    "Length of names must match number of levels in MultiIndex."
-                )
+        if level is not None and len(names) != len(level):
+            raise ValueError("Length of names must match length of level.")
+        if level is None and len(names) != self.nlevels:
+            raise ValueError(
+                "Length of names must match number of levels in MultiIndex."
+            )
 
         if level is None:
             level = range(self.nlevels)
@@ -1627,8 +1624,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
                     )
             self._names[lev] = name
 
-        # If .levels has been accessed, the names in our cache will be stale.
-        self._reset_cache()
+        # If .levels has been accessed, the .name of each level in our cache
+        # will be stale.
+        self._reset_cache("levels")
 
     names = property(
         fset=_set_names,
diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
index 43aec12055cd2..c7ed21a2cc001 100644
--- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
@@ -5,6 +5,7 @@
 from pandas import (
     DataFrame,
     MultiIndex,
+    RangeIndex,
     Series,
 )
 import pandas._testing as tm
@@ -68,3 +69,19 @@ def test_indexer_caching(monkeypatch):
         s[s == 0] = 1
     expected = Series(np.ones(size_cutoff), index=index)
     tm.assert_series_equal(s, expected)
+
+
+def test_set_names_only_clears_level_cache():
+    mi = MultiIndex.from_arrays([range(4), range(4)], names=["a", "b"])
+    mi.dtypes
+    mi.is_monotonic_increasing
+    mi._engine
+    mi.levels
+    old_cache_keys = sorted(mi._cache.keys())
+    assert old_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing", "levels"]
+    mi.names = ["A", "B"]
+    new_cache_keys = sorted(mi._cache.keys())
+    assert new_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing"]
+    new_levels = mi.levels
+    tm.assert_index_equal(new_levels[0], RangeIndex(4, name="A"))
+    tm.assert_index_equal(new_levels[1], RangeIndex(4, name="B"))

From 57a4fb9cc8c62abecf6c388cd812aa32cc11325d Mon Sep 17 00:00:00 2001
From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
Date: Tue, 3 Sep 2024 21:10:10 +0100
Subject: [PATCH 054/176] DOC: Add 'See Also'and return value description for
 pandas.api.interchang.from_dataframe (#59403)

DOCS: Add 'See Also'and return value description for pandas.api.interchange.from_dataframe
---
 ci/code_checks.sh                         | 1 -
 pandas/core/interchange/from_dataframe.py | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 41418e75b171d..4ed27ce8c9298 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -134,7 +134,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-        -i "pandas.api.interchange.from_dataframe RT03,SA01" \
         -i "pandas.api.types.is_bool PR01,SA01" \
         -i "pandas.api.types.is_categorical_dtype SA01" \
         -i "pandas.api.types.is_complex PR01,SA01" \
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
index 7f2647d64b190..0e5776ae8cdd9 100644
--- a/pandas/core/interchange/from_dataframe.py
+++ b/pandas/core/interchange/from_dataframe.py
@@ -60,6 +60,13 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
     Returns
     -------
     pd.DataFrame
+        A pandas DataFrame built from the provided interchange
+        protocol object.
+
+    See Also
+    --------
+    pd.DataFrame : DataFrame class which can be created from various input data
+        formats, including objects that support the interchange protocol.
 
     Examples
     --------

From f3e1991bd4d19c2ddf2cfbd239a5e6a099c882b3 Mon Sep 17 00:00:00 2001
From: Michael Vincent Mannino
 <54487298+michaelmannino@users.noreply.github.com>
Date: Tue, 3 Sep 2024 17:42:23 -0400
Subject: [PATCH 055/176] ENH: DataFrame.plot.scatter argument `c` now accepts
 a column of strings, where rows with the same string are colored identically
 (#59239)

---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/plotting/_matplotlib/core.py           | 41 ++++++++++++++
 .../tests/plotting/frame/test_frame_color.py  | 54 ++++++++++++++++++-
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c10dbb63e54f0..75d3ff1193f8d 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -53,6 +53,7 @@ Other enhancements
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
+- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index ed24e246c5079..505db4b807cfc 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1343,6 +1343,22 @@ def _make_plot(self, fig: Figure) -> None:
             label = self.label
         else:
             label = None
+
+        # if a list of non color strings is passed in as c, color points
+        # by uniqueness of the strings, such same strings get same color
+        create_colors = not self._are_valid_colors(c_values)
+        if create_colors:
+            color_mapping = self._get_color_mapping(c_values)
+            c_values = [color_mapping[s] for s in c_values]
+
+            # build legend for labeling custom colors
+            ax.legend(
+                handles=[
+                    mpl.patches.Circle((0, 0), facecolor=c, label=s)
+                    for s, c in color_mapping.items()
+                ]
+            )
+
         scatter = ax.scatter(
             data[x].values,
             data[y].values,
@@ -1353,6 +1369,7 @@ def _make_plot(self, fig: Figure) -> None:
             s=self.s,
             **self.kwds,
         )
+
         if cb:
             cbar_label = c if c_is_column else ""
             cbar = self._plot_colorbar(ax, fig=fig, label=cbar_label)
@@ -1392,6 +1409,30 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool):
             c_values = c
         return c_values
 
+    def _are_valid_colors(self, c_values: Series) -> bool:
+        # check if c_values contains strings and if these strings are valid mpl colors.
+        # no need to check numerics as these (and mpl colors) will be validated for us
+        # in .Axes.scatter._parse_scatter_color_args(...)
+        unique = np.unique(c_values)
+        try:
+            if len(c_values) and all(isinstance(c, str) for c in unique):
+                mpl.colors.to_rgba_array(unique)
+
+            return True
+
+        except (TypeError, ValueError) as _:
+            return False
+
+    def _get_color_mapping(self, c_values: Series) -> dict[str, np.ndarray]:
+        unique = np.unique(c_values)
+        n_colors = len(unique)
+
+        # passing `None` here will default to :rc:`image.cmap`
+        cmap = mpl.colormaps.get_cmap(self.colormap)
+        colors = cmap(np.linspace(0, 1, n_colors))  # RGB tuples
+
+        return dict(zip(unique, colors))
+
     def _get_norm_and_cmap(self, c_values, color_by_categorical: bool):
         c = self.c
         if self.colormap is not None:
diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py
index ce198fbff1185..74ee45664e01a 100644
--- a/pandas/tests/plotting/frame/test_frame_color.py
+++ b/pandas/tests/plotting/frame/test_frame_color.py
@@ -217,8 +217,53 @@ def test_scatter_with_c_column_name_with_colors(self, cmap):
                 ax = df.plot.scatter(x=0, y=1, cmap=cmap, c="species")
         else:
             ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap)
+
+        assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == 3  # r/g/b
+        assert (
+            np.unique(ax.collections[0].get_facecolor(), axis=0)
+            == np.array(
+                [[0.0, 0.0, 1.0, 1.0], [0.0, 0.5, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0]]
+            )  # r/g/b
+        ).all()
         assert ax.collections[0].colorbar is None
 
+    def test_scatter_with_c_column_name_without_colors(self):
+        # Given
+        colors = ["NY", "MD", "MA", "CA"]
+        color_count = 4  # 4 unique colors
+
+        # When
+        df = DataFrame(
+            {
+                "dataX": range(100),
+                "dataY": range(100),
+                "color": (colors[i % len(colors)] for i in range(100)),
+            }
+        )
+
+        # Then
+        ax = df.plot.scatter("dataX", "dataY", c="color")
+        assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == color_count
+
+        # Given
+        colors = ["r", "g", "not-a-color"]
+        color_count = 3
+        # Also, since not all are mpl-colors, points matching 'r' or 'g'
+        # are not necessarily red or green
+
+        # When
+        df = DataFrame(
+            {
+                "dataX": range(100),
+                "dataY": range(100),
+                "color": (colors[i % len(colors)] for i in range(100)),
+            }
+        )
+
+        # Then
+        ax = df.plot.scatter("dataX", "dataY", c="color")
+        assert len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == color_count
+
     def test_scatter_colors(self):
         df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
         with pytest.raises(TypeError, match="Specify exactly one of `c` and `color`"):
@@ -229,7 +274,14 @@ def test_scatter_colors_not_raising_warnings(self):
         # provided via 'c'. Parameters 'cmap' will be ignored
         df = DataFrame({"x": [1, 2, 3], "y": [1, 2, 3]})
         with tm.assert_produces_warning(None):
-            df.plot.scatter(x="x", y="y", c="b")
+            ax = df.plot.scatter(x="x", y="y", c="b")
+            assert (
+                len(np.unique(ax.collections[0].get_facecolor(), axis=0)) == 1
+            )  # blue
+            assert (
+                np.unique(ax.collections[0].get_facecolor(), axis=0)
+                == np.array([[0.0, 0.0, 1.0, 1.0]])
+            ).all()  # blue
 
     def test_scatter_colors_default(self):
         df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})

From 13578bfdb7b3f3a36245a71e5f4324187ed56607 Mon Sep 17 00:00:00 2001
From: Sergio Livi <me@serl.it>
Date: Tue, 3 Sep 2024 23:53:01 +0200
Subject: [PATCH 056/176] Fix obj arguments in assertions (#59460)

* Fix obj arguments in assertions

* Add test for assert_interval_array_equal

* Still say MultiIndex in some cases

* Be a little braver and change the original obj definition
---
 pandas/_testing/asserters.py                  | 13 +++++++-----
 pandas/tests/util/test_assert_frame_equal.py  |  2 +-
 .../util/test_assert_interval_array_equal.py  | 21 ++++++++++++++++++-
 pandas/tests/util/test_assert_series_equal.py |  2 +-
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
index 047e8f91df23e..bbd5e60a5a812 100644
--- a/pandas/_testing/asserters.py
+++ b/pandas/_testing/asserters.py
@@ -188,7 +188,7 @@ def assert_index_equal(
     check_order: bool = True,
     rtol: float = 1.0e-5,
     atol: float = 1.0e-8,
-    obj: str = "Index",
+    obj: str | None = None,
 ) -> None:
     """
     Check that left and right Index are equal.
@@ -217,7 +217,7 @@ def assert_index_equal(
         Relative tolerance. Only used when check_exact is False.
     atol : float, default 1e-8
         Absolute tolerance. Only used when check_exact is False.
-    obj : str, default 'Index'
+    obj : str, default 'Index' or 'MultiIndex'
         Specify object name being compared, internally used to show appropriate
         assertion message.
 
@@ -235,6 +235,9 @@ def assert_index_equal(
     """
     __tracebackhide__ = True
 
+    if obj is None:
+        obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
+
     def _check_types(left, right, obj: str = "Index") -> None:
         if not exact:
             return
@@ -283,7 +286,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
         right = cast(MultiIndex, right)
 
         for level in range(left.nlevels):
-            lobj = f"MultiIndex level [{level}]"
+            lobj = f"{obj} level [{level}]"
             try:
                 # try comparison on levels/codes to avoid densifying MultiIndex
                 assert_index_equal(
@@ -314,7 +317,7 @@ def _check_types(left, right, obj: str = "Index") -> None:
                     obj=lobj,
                 )
             # get_level_values may change dtype
-            _check_types(left.levels[level], right.levels[level], obj=obj)
+            _check_types(left.levels[level], right.levels[level], obj=lobj)
 
     # skip exact index checking when `check_categorical` is False
     elif check_exact and check_categorical:
@@ -527,7 +530,7 @@ def assert_interval_array_equal(
         kwargs["check_freq"] = False
 
     assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
-    assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
+    assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
 
     assert_attr_equal("closed", left, right, obj=obj)
 
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
index 28d96ea25cba7..ea954756d63c8 100644
--- a/pandas/tests/util/test_assert_frame_equal.py
+++ b/pandas/tests/util/test_assert_frame_equal.py
@@ -79,7 +79,7 @@ def test_frame_equal_shape_mismatch(df1, df2, frame_or_series):
             DataFrame.from_records(
                 {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
             ),
-            "MultiIndex level \\[0\\] are different",
+            "DataFrame\\.index level \\[0\\] are different",
         ),
     ],
 )
diff --git a/pandas/tests/util/test_assert_interval_array_equal.py b/pandas/tests/util/test_assert_interval_array_equal.py
index 8cc4ade3d7e95..aad27672c0f6f 100644
--- a/pandas/tests/util/test_assert_interval_array_equal.py
+++ b/pandas/tests/util/test_assert_interval_array_equal.py
@@ -1,7 +1,11 @@
 import pytest
 
-from pandas import interval_range
+from pandas import (
+    Interval,
+    interval_range,
+)
 import pandas._testing as tm
+from pandas.arrays import IntervalArray
 
 
 @pytest.mark.parametrize(
@@ -79,3 +83,18 @@ def test_interval_array_equal_start_mismatch():
 
     with pytest.raises(AssertionError, match=msg):
         tm.assert_interval_array_equal(arr1, arr2)
+
+
+def test_interval_array_equal_end_mismatch_only():
+    arr1 = IntervalArray([Interval(0, 1), Interval(0, 5)])
+    arr2 = IntervalArray([Interval(0, 1), Interval(0, 6)])
+
+    msg = """\
+IntervalArray.right are different
+
+IntervalArray.right values are different \\(50.0 %\\)
+\\[left\\]:  \\[1, 5\\]
+\\[right\\]: \\[1, 6\\]"""
+
+    with pytest.raises(AssertionError, match=msg):
+        tm.assert_interval_array_equal(arr1, arr2)
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index a3b24c029fbac..c3cd90f2edfb3 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -137,7 +137,7 @@ def test_less_precise(data1, data2, any_float_dtype, decimals):
             DataFrame.from_records(
                 {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
             ).c,
-            "MultiIndex level \\[0\\] are different",
+            "Series\\.index level \\[0\\] are different",
         ),
     ],
 )

From 197e8db481a4ad1f895be896c771ba32f66b7f1d Mon Sep 17 00:00:00 2001
From: Maria Ivanova <maria.stef.ivanova@gmail.com>
Date: Tue, 3 Sep 2024 23:51:24 +0100
Subject: [PATCH 057/176] Fix docstring timestamps (Issue #59458) (#59701)

add value docstring
---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timestamps.pyx | 25 +++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4ed27ce8c9298..b009212084dde 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -131,7 +131,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.nanosecond GL08" \
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
-        -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
         -i "pandas.api.types.is_bool PR01,SA01" \
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index b3811eb644dc5..34c84d396ad64 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
 
     @property
     def value(self) -> int:
+        """
+        Return the value of the Timestamp.
+
+        Returns
+        -------
+        int
+            The integer representation of the Timestamp object in nanoseconds
+            since the Unix epoch (1970-01-01 00:00:00 UTC).
+
+        See Also
+        --------
+        Timestamp.second : Return the second of the Timestamp.
+        Timestamp.minute : Return the minute of the Timestamp.
+
+        Examples
+        --------
+        >>> ts = pd.Timestamp("2024-08-31 16:16:30")
+        >>> ts.value
+        1725120990000000000
+        """
+
         try:
             return convert_reso(self._value, self._creso, NPY_FR_ns, False)
         except OverflowError:
@@ -1020,8 +1041,8 @@ cdef class _Timestamp(ABCTimestamp):
 
         See Also
         --------
-        Timestamp.day : Return the day of the year.
-        Timestamp.year : Return the year of the week.
+        Timestamp.day : Return the day of the Timestamp.
+        Timestamp.year : Return the year of the Timestamp.
 
         Examples
         --------

From 59ebd4e02a64efe9d1620d6bba321f0db495cc52 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 4 Sep 2024 09:27:00 -0700
Subject: [PATCH 058/176] REF (string): de-duplicate str_isfoo methods (#59705)

---
 pandas/core/arrays/_arrow_string_mixins.py | 36 +++++++++++++++++
 pandas/core/arrays/arrow/array.py          | 27 -------------
 pandas/core/arrays/string_arrow.py         | 46 +++++-----------------
 3 files changed, 46 insertions(+), 63 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index b5cf0573e70ba..ba20111e0d858 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -154,3 +154,39 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
         if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
             result = result.fill_null(na)
         return self._convert_bool_result(result)
+
+    def _str_isalnum(self):
+        result = pc.utf8_is_alnum(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isalpha(self):
+        result = pc.utf8_is_alpha(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isdecimal(self):
+        result = pc.utf8_is_decimal(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isdigit(self):
+        result = pc.utf8_is_digit(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_islower(self):
+        result = pc.utf8_is_lower(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isnumeric(self):
+        result = pc.utf8_is_numeric(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isspace(self):
+        result = pc.utf8_is_space(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_istitle(self):
+        result = pc.utf8_is_title(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isupper(self):
+        result = pc.utf8_is_upper(self._pa_array)
+        return self._convert_bool_result(result)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index fa778aab71349..807854a13f285 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2442,33 +2442,6 @@ def _str_slice(
             pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
         )
 
-    def _str_isalnum(self) -> Self:
-        return type(self)(pc.utf8_is_alnum(self._pa_array))
-
-    def _str_isalpha(self) -> Self:
-        return type(self)(pc.utf8_is_alpha(self._pa_array))
-
-    def _str_isdecimal(self) -> Self:
-        return type(self)(pc.utf8_is_decimal(self._pa_array))
-
-    def _str_isdigit(self) -> Self:
-        return type(self)(pc.utf8_is_digit(self._pa_array))
-
-    def _str_islower(self) -> Self:
-        return type(self)(pc.utf8_is_lower(self._pa_array))
-
-    def _str_isnumeric(self) -> Self:
-        return type(self)(pc.utf8_is_numeric(self._pa_array))
-
-    def _str_isspace(self) -> Self:
-        return type(self)(pc.utf8_is_space(self._pa_array))
-
-    def _str_istitle(self) -> Self:
-        return type(self)(pc.utf8_is_title(self._pa_array))
-
-    def _str_isupper(self) -> Self:
-        return type(self)(pc.utf8_is_upper(self._pa_array))
-
     def _str_len(self) -> Self:
         return type(self)(pc.utf8_length(self._pa_array))
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index a3169985828e8..6dd0ca2de11ba 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -281,6 +281,16 @@ def astype(self, dtype, copy: bool = True):
     # ------------------------------------------------------------------------
     # String methods interface
 
+    _str_isalnum = ArrowStringArrayMixin._str_isalnum
+    _str_isalpha = ArrowStringArrayMixin._str_isalpha
+    _str_isdecimal = ArrowStringArrayMixin._str_isdecimal
+    _str_isdigit = ArrowStringArrayMixin._str_isdigit
+    _str_islower = ArrowStringArrayMixin._str_islower
+    _str_isnumeric = ArrowStringArrayMixin._str_isnumeric
+    _str_isspace = ArrowStringArrayMixin._str_isspace
+    _str_istitle = ArrowStringArrayMixin._str_istitle
+    _str_isupper = ArrowStringArrayMixin._str_isupper
+
     _str_map = BaseStringArray._str_map
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
@@ -360,42 +370,6 @@ def _str_slice(
             pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
         )
 
-    def _str_isalnum(self):
-        result = pc.utf8_is_alnum(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isalpha(self):
-        result = pc.utf8_is_alpha(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isdecimal(self):
-        result = pc.utf8_is_decimal(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isdigit(self):
-        result = pc.utf8_is_digit(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_islower(self):
-        result = pc.utf8_is_lower(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isnumeric(self):
-        result = pc.utf8_is_numeric(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isspace(self):
-        result = pc.utf8_is_space(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_istitle(self):
-        result = pc.utf8_is_title(self._pa_array)
-        return self._convert_bool_result(result)
-
-    def _str_isupper(self):
-        result = pc.utf8_is_upper(self._pa_array)
-        return self._convert_bool_result(result)
-
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
         return self._convert_int_result(result)

From e5cdbf45ab9e45ebbe6265626a8b0ce440861ffa Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 4 Sep 2024 09:38:26 -0700
Subject: [PATCH 059/176] TST (string): copy/view tests (#59702)

---
 pandas/core/dtypes/dtypes.py                |  2 +-
 pandas/tests/copy_view/test_constructors.py |  5 +---
 pandas/tests/copy_view/test_functions.py    | 32 ++++++++++-----------
 pandas/tests/copy_view/test_internals.py    | 10 +++----
 pandas/tests/dtypes/test_dtypes.py          |  3 --
 5 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 6928335330e66..54003e67be7ba 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -455,7 +455,7 @@ def __eq__(self, other: object) -> bool:
                 # Because left and right have the same length and are unique,
                 #  `indexer` not having any -1s implies that there is a
                 #  bijection between `left` and `right`.
-                return (indexer != -1).all()
+                return bool((indexer != -1).all())
 
             # With object-dtype we need a comparison that identifies
             #  e.g. int(2) as distinct from float(2)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index 743e094032505..4e4df4ba3cf22 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -209,9 +207,8 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
     assert np.shares_memory(arr_before, arr_after)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
-    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
+    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], object)]
 )
 def test_dataframe_from_series_or_index(data, dtype, index_or_series):
     obj = index_or_series(data, dtype=dtype)
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index dd4dd154f74b0..fcdece6077829 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -16,10 +16,9 @@
 from pandas.tests.copy_view.util import get_array
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames():
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     df_orig = df.copy()
     result = concat([df, df2], axis=1)
 
@@ -35,10 +34,9 @@ def test_concat_frames():
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames_updating_input():
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     result = concat([df, df2], axis=1)
 
     assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
@@ -155,7 +153,7 @@ def test_concat_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+# @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "func",
     [
@@ -164,8 +162,8 @@ def test_concat_copy_keyword():
     ],
 )
 def test_merge_on_key(func):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "b": [4, 5, 6]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -207,7 +205,6 @@ def test_merge_on_index():
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "func, how",
     [
@@ -216,8 +213,8 @@ def test_merge_on_index():
     ],
 )
 def test_merge_on_key_enlarging_one(func, how):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b"], dtype=object), "b": [4, 5]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -251,9 +248,13 @@ def test_merge_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW,
+    reason="TODO(infer_string); result.index infers str dtype while both "
+    "df1 and df2 index are object.",
+)
 def test_join_on_key():
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
@@ -279,9 +280,8 @@ def test_join_on_key():
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_join_multiple_dataframes_on_key():
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     dfs_list = [
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index b2a26ceacd6c3..b7baf01ecc36e 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -1,9 +1,10 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    Series,
+)
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
 
@@ -42,7 +43,6 @@ def test_consolidate():
     assert df.loc[0, "b"] == 0.1
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype", [np.intp, np.int8])
 @pytest.mark.parametrize(
     "locs, arr",
@@ -68,7 +68,7 @@ def test_iset_splits_blocks_inplace(locs, arr, dtype):
             "c": [7, 8, 9],
             "d": [10, 11, 12],
             "e": [13, 14, 15],
-            "f": ["a", "b", "c"],
+            "f": Series(["a", "b", "c"], dtype=object),
         },
     )
     arr = arr.astype(dtype)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 7c7da41124b83..33232e8df14e9 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 
 from pandas.core.dtypes.base import _registry as registry
@@ -961,7 +959,6 @@ def test_same_categories_different_order(self):
         c2 = CategoricalDtype(["b", "a"], ordered=True)
         assert c1 is not c2
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("ordered2", [True, False, None])
     def test_categorical_equality(self, ordered, ordered2):
         # same categories, same order

From c67c22d21da6d8bcff98ae3ee6f5bc87ecf6fda4 Mon Sep 17 00:00:00 2001
From: Kevin Doshi <118382123+doshi-kevin@users.noreply.github.com>
Date: Wed, 4 Sep 2024 22:09:43 +0530
Subject: [PATCH 060/176] Changed to GroupBy (#59699)

* Changed to GroupBy

* Update pandas/core/groupby/groupby.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/core/groupby/groupby.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/core/groupby/groupby.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5b718f0ce2a29..79fe78b7e5405 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -414,9 +414,9 @@ class providing the base-class of operations.
 
 See Also
 --------
-{klass}.groupby.apply : Apply function func group-wise
+{klass}GroupBy.apply : Apply function func group-wise
     and combine the results together.
-{klass}.groupby.transform : Transforms the Series on each group
+{klass}GroupBy.transform : Transforms the Series on each group
     based on the given function.
 {klass}.aggregate : Aggregate using one or more operations.
 

From 059ae319f4fa6bfbef2c5df0c923691a0ea25213 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 4 Sep 2024 12:44:42 -0700
Subject: [PATCH 061/176] TST (string): more targeted xfails in test_string.py
 (#59703)

* TST (string): more targeted xfails in test_string.py

* Fix no-pyarrow test

* Update pandas/tests/extension/test_string.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/tests/extension/test_string.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/tests/extension/test_string.py | 36 +++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index e1f2db149edf9..17f6eb8282b23 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -22,7 +22,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
+from pandas.compat import HAS_PYARROW
 
 import pandas as pd
 import pandas._testing as tm
@@ -31,10 +31,6 @@
 from pandas.core.arrays.string_ import StringDtype
 from pandas.tests.extension import base
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 def maybe_split_array(arr, chunked):
     if not chunked:
@@ -217,6 +213,36 @@ def test_compare_scalar(self, data, comparison_op):
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
+    def test_combine_add(self, data_repeated, using_infer_string, request):
+        dtype = next(data_repeated(1)).dtype
+        if using_infer_string and (
+            (dtype.na_value is pd.NA) and dtype.storage == "python"
+        ):
+            mark = pytest.mark.xfail(
+                reason="The pointwise operation result will be inferred to "
+                "string[nan, pyarrow], which does not match the input dtype"
+            )
+            request.applymarker(mark)
+        super().test_combine_add(data_repeated)
+
+    def test_arith_series_with_array(
+        self, data, all_arithmetic_operators, using_infer_string, request
+    ):
+        dtype = data.dtype
+        if (
+            using_infer_string
+            and all_arithmetic_operators == "__radd__"
+            and (
+                (dtype.na_value is pd.NA) or (dtype.storage == "python" and HAS_PYARROW)
+            )
+        ):
+            mark = pytest.mark.xfail(
+                reason="The pointwise operation result will be inferred to "
+                "string[nan, pyarrow], which does not match the input dtype"
+            )
+            request.applymarker(mark)
+        super().test_arith_series_with_array(data, all_arithmetic_operators)
+
 
 class Test2DCompat(base.Dim2CompatTests):
     @pytest.fixture(autouse=True)

From bc9b1c3c4b979978dcdef42b900aa633cfeee28e Mon Sep 17 00:00:00 2001
From: UDIT BALIYAN <130930448+uditbaliyan@users.noreply.github.com>
Date: Thu, 5 Sep 2024 01:15:49 +0530
Subject: [PATCH 062/176] Fix part of docstring timedelta index#59698 (#59707)

* fix pandas.Timestamp.fold GL08

* fix pandas.Timestamp.fold GL08

* remove i- pandas.Timestamp.fold GL08

* fix pre-commit error

* fix docstring error

* add see also for ceil,floor,asm8,round

* remove ceil,floor,asm8,round from code_checks.sh
---
 ci/code_checks.sh                  |  4 ----
 pandas/_libs/tslibs/timedeltas.pyx | 32 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b009212084dde..0714c6f74f0c2 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -109,14 +109,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Series.sparse.sp_values SA01" \
-        -i "pandas.Timedelta.asm8 SA01" \
-        -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
-        -i "pandas.Timedelta.floor SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
-        -i "pandas.Timedelta.round SA01" \
         -i "pandas.Timedelta.to_numpy PR01" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.Timedelta.total_seconds SA01" \
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index d5348311f19e2..36be1812b0187 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1498,6 +1498,12 @@ cdef class _Timedelta(timedelta):
         numpy timedelta64 array scalar view
             Array scalar view of the timedelta in nanoseconds.
 
+        See Also
+        --------
+            Timedelta.total_seconds : Return the total seconds in the duration.
+            Timedelta.components : Return a namedtuple of the Timedelta's components.
+            Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
+
         Examples
         --------
         >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2067,12 @@ class Timedelta(_Timedelta):
         ------
         ValueError if the freq cannot be converted
 
+        See Also
+        --------
+            Timedelta.floor : Floor the Timedelta to the specified resolution.
+            Timedelta.round : Round the Timedelta to the nearest specified resolution.
+            Timestamp.ceil : Similar method for Timestamp objects.
+
         Examples
         --------
         >>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2093,16 @@ class Timedelta(_Timedelta):
             Frequency string indicating the flooring resolution.
             It uses the same units as class constructor :class:`~pandas.Timedelta`.
 
+        Returns
+        -------
+        Timedelta
+            A new Timedelta object floored to the specified resolution.
+
+        See Also
+        --------
+            Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
+            Timestamp.round : Round the Timestamp to the nearest specified resolution.
+
         Examples
         --------
         >>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2123,16 @@ class Timedelta(_Timedelta):
             Frequency string indicating the ceiling resolution.
             It uses the same units as class constructor :class:`~pandas.Timedelta`.
 
+        Returns
+        -------
+        Timedelta
+            A new Timedelta object ceiled to the specified resolution.
+
+        See Also
+        --------
+            Timedelta.floor : Floor the Timedelta to the specified resolution.
+            Timedelta.round : Round the Timedelta to the nearest specified resolution.
+
         Examples
         --------
         >>> td = pd.Timedelta('1001ms')

From 85be99eac9b78afcf98955cd85c60d75c5726242 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 5 Sep 2024 07:24:22 -1000
Subject: [PATCH 063/176] PERF: CategoricalDtype.update_dtype (#59647)

* PERF: CategoricalDtype.update_dtype

* Add whatsnew number add comment

* Fix unit test

* short circut only for the dtype
---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/core/dtypes/dtypes.py   | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 75d3ff1193f8d..cd353b60d1a6e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -528,6 +528,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 54003e67be7ba..68b4807961d19 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -611,6 +611,13 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
             dtype = cast(CategoricalDtype, dtype)
 
         # update categories/ordered unless they've been explicitly passed as None
+        if (
+            isinstance(dtype, CategoricalDtype)
+            and dtype.categories is not None
+            and dtype.ordered is not None
+        ):
+            # Avoid re-validation in CategoricalDtype constructor
+            return dtype
         new_categories = (
             dtype.categories if dtype.categories is not None else self.categories
         )

From 4f1052e390ea6d33e81ec1dc7c6801bb6b5b79ef Mon Sep 17 00:00:00 2001
From: "Mien (Josephine) Nguyen" <josephinee.nguyen@gmail.com>
Date: Thu, 5 Sep 2024 14:07:07 -0400
Subject: [PATCH 064/176] TST: Update BooleanArray _logical_method test to fail
 on incorrect length comparison operator (#59708)

Test
---
 pandas/tests/arrays/boolean/test_logical.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py
index 66c117ea3fc66..97a24e0f24756 100644
--- a/pandas/tests/arrays/boolean/test_logical.py
+++ b/pandas/tests/arrays/boolean/test_logical.py
@@ -60,19 +60,20 @@ def test_eq_mismatched_type(self, other):
         expected = pd.array([True, True])
         tm.assert_extension_array_equal(result, expected)
 
-    def test_logical_length_mismatch_raises(self, all_logical_operators):
+    @pytest.mark.parametrize("other", [[True, False], [True, False, True, False]])
+    def test_logical_length_mismatch_raises(self, other, all_logical_operators):
         op_name = all_logical_operators
         a = pd.array([True, False, None], dtype="boolean")
         msg = "Lengths must match"
 
         with pytest.raises(ValueError, match=msg):
-            getattr(a, op_name)([True, False])
+            getattr(a, op_name)(other)
 
         with pytest.raises(ValueError, match=msg):
-            getattr(a, op_name)(np.array([True, False]))
+            getattr(a, op_name)(np.array(other))
 
         with pytest.raises(ValueError, match=msg):
-            getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
+            getattr(a, op_name)(pd.array(other, dtype="boolean"))
 
     def test_logical_nan_raises(self, all_logical_operators):
         op_name = all_logical_operators

From 6c30aa22c4537e3ccf5fd968d00c328cd1865545 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Sep 2024 16:21:06 -0700
Subject: [PATCH 065/176] REF (string): de-duplicate _str_contains (#59709)

* REF: de-duplicate _str_contains

* pyright ignore
---
 pandas/core/arrays/_arrow_string_mixins.py | 15 +++++++++++++++
 pandas/core/arrays/arrow/array.py          | 15 ---------------
 pandas/core/arrays/string_arrow.py         | 14 ++++----------
 3 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index ba20111e0d858..5b34a7e2c7cef 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -190,3 +190,18 @@ def _str_istitle(self):
     def _str_isupper(self):
         result = pc.utf8_is_upper(self._pa_array)
         return self._convert_bool_result(result)
+
+    def _str_contains(
+        self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
+    ):
+        if flags:
+            raise NotImplementedError(f"contains not implemented with {flags=}")
+
+        if regex:
+            pa_contains = pc.match_substring_regex
+        else:
+            pa_contains = pc.match_substring
+        result = pa_contains(self._pa_array, pat, ignore_case=not case)
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 807854a13f285..40819ba4ab338 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2322,21 +2322,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
             raise NotImplementedError(f"count not implemented with {flags=}")
         return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
-    def _str_contains(
-        self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
-    ) -> Self:
-        if flags:
-            raise NotImplementedError(f"contains not implemented with {flags=}")
-
-        if regex:
-            pa_contains = pc.match_substring_regex
-        else:
-            pa_contains = pc.match_substring
-        result = pa_contains(self._pa_array, pat, ignore_case=not case)
-        if not isna(na):
-            result = result.fill_null(na)
-        return type(self)(result)
-
     def _result_converter(self, result):
         return type(self)(result)
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 6dd0ca2de11ba..e18beb629d0c4 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -223,10 +223,8 @@ def insert(self, loc: int, item) -> ArrowStringArray:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
-    def _convert_bool_result(self, values, na=None):
+    def _convert_bool_result(self, values):
         if self.dtype.na_value is np.nan:
-            if not isna(na):
-                values = values.fill_null(bool(na))
             return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
         return BooleanDtype().__from_arrow__(values)
 
@@ -304,11 +302,6 @@ def _str_contains(
                 fallback_performancewarning()
             return super()._str_contains(pat, case, flags, na, regex)
 
-        if regex:
-            result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
-        else:
-            result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
-        result = self._convert_bool_result(result, na=na)
         if not isna(na):
             if not isinstance(na, bool):
                 # GH#59561
@@ -318,8 +311,9 @@ def _str_contains(
                     FutureWarning,
                     stacklevel=find_stack_level(),
                 )
-            result[isna(result)] = bool(na)
-        return result
+                na = bool(na)
+
+        return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
 
     def _str_replace(
         self,

From 3a4526516ae2e64cd9815e87c4c9e23c24b191e9 Mon Sep 17 00:00:00 2001
From: Manlai Amar <70603274+amanlai@users.noreply.github.com>
Date: Thu, 5 Sep 2024 17:47:52 -0700
Subject: [PATCH 066/176] DOC: Fix some docstring validation errors #59698
 (#59713)

* fix some docstring errors

* removed trailing whitespace

* pd.Series.dt.microseconds has the same documentation as pd.TimedeltaIndex.microseconds and SA01 was cleared for both in the previous commit
---
 ci/code_checks.sh                  |  4 ----
 pandas/_libs/tslibs/timedeltas.pyx | 13 ++++++++++---
 pandas/core/arrays/timedeltas.py   | 12 ++++++++++++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 0714c6f74f0c2..fcbeb20d083d6 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -92,7 +92,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.day_name PR01,PR02" \
         -i "pandas.Series.dt.floor PR01,PR02" \
         -i "pandas.Series.dt.freq GL08" \
-        -i "pandas.Series.dt.microseconds SA01" \
         -i "pandas.Series.dt.month_name PR01,PR02" \
         -i "pandas.Series.dt.nanoseconds SA01" \
         -i "pandas.Series.dt.normalize PR01" \
@@ -113,12 +112,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
-        -i "pandas.Timedelta.to_numpy PR01" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.Timedelta.total_seconds SA01" \
         -i "pandas.Timedelta.view SA01" \
-        -i "pandas.TimedeltaIndex.components SA01" \
-        -i "pandas.TimedeltaIndex.microseconds SA01" \
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 36be1812b0187..a7bc2de5ad837 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1421,9 +1421,16 @@ cdef class _Timedelta(timedelta):
         """
         Convert the Timedelta to a NumPy timedelta64.
 
-        This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
-        copy parameters are available here only for compatibility. Their values
-        will not affect the return value.
+        This is an alias method for `Timedelta.to_timedelta64()`.
+
+        Parameters
+        ----------
+        dtype : NoneType
+            It is available here only for compatibility. Its value will not
+            affect the return value.
+        copy : bool, default False
+            It is available here only for compatibility. Its value will not
+            affect the return value.
 
         Returns
         -------
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index b2cfbe7338c0d..c8a86ffc187d0 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -876,6 +876,12 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
     microseconds_docstring = textwrap.dedent(
         """Number of microseconds (>= 0 and less than 1 second) for each element.
 
+    See Also
+    --------
+    pd.Timedelta.microseconds : Number of microseconds (>= 0 and less than 1 second).
+    pd.Timedelta.to_pytimedelta.microseconds : Number of microseconds (>= 0 and less
+        than 1 second) of a datetime.timedelta.
+
     Examples
     --------
     For Series:
@@ -955,6 +961,12 @@ def components(self) -> DataFrame:
         -------
         DataFrame
 
+        See Also
+        --------
+        TimedeltaIndex.total_seconds : Return total duration expressed in seconds.
+        Timedelta.components : Return a components namedtuple-like of a single
+            timedelta.
+
         Examples
         --------
         >>> tdelta_idx = pd.to_timedelta(["1 day 3 min 2 us 42 ns"])

From 08431f17333a91f8191146646b2a136f91bfe7d2 Mon Sep 17 00:00:00 2001
From: Deepak Kapila <deepak.kapila@gmail.com>
Date: Fri, 6 Sep 2024 09:53:43 -0400
Subject: [PATCH 067/176] DOC: Clarify docs for df.to_sql (#59727)

---
 pandas/core/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index bc47b662a08d3..42516f0a85e07 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2812,8 +2812,8 @@ def to_sql(
             `index` is True, then the index names are used.
             A sequence should be given if the DataFrame uses MultiIndex.
         chunksize : int, optional
-            Specify the number of rows in each batch to be written at a time.
-            By default, all rows will be written at once.
+            Specify the number of rows in each batch to be written to the database connection at a time.
+            By default, all rows will be written at once. Also see the method keyword.
         dtype : dict or scalar, optional
             Specifying the datatype for columns. If a dictionary is used, the
             keys should be the column names and the values should be the

From 3f8d3e495a3a26f0be960ec70dee20e2411a4bb4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 6 Sep 2024 08:06:15 -0700
Subject: [PATCH 068/176] BUG (string): ArrowStringArray.find corner cases
 (#59562)

---
 pandas/core/arrays/_arrow_string_mixins.py | 44 +++++++++++++++++++++-
 pandas/core/arrays/arrow/array.py          | 23 -----------
 pandas/core/arrays/string_arrow.py         | 18 ++++-----
 pandas/tests/extension/test_arrow.py       | 31 ++++++---------
 4 files changed, 61 insertions(+), 55 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 5b34a7e2c7cef..950d4cd7cc92e 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -3,6 +3,7 @@
 from functools import partial
 from typing import (
     TYPE_CHECKING,
+    Any,
     Literal,
 )
 
@@ -10,6 +11,7 @@
 
 from pandas.compat import (
     pa_version_under10p1,
+    pa_version_under13p0,
     pa_version_under17p0,
 )
 
@@ -20,7 +22,10 @@
     import pyarrow.compute as pc
 
 if TYPE_CHECKING:
-    from collections.abc import Sized
+    from collections.abc import (
+        Callable,
+        Sized,
+    )
 
     from pandas._typing import (
         Scalar,
@@ -42,6 +47,9 @@ def _convert_int_result(self, result):
         # Convert an integer-dtype result to the appropriate result type
         raise NotImplementedError
 
+    def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
+        raise NotImplementedError
+
     def _str_pad(
         self,
         width: int,
@@ -205,3 +213,37 @@ def _str_contains(
         if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
             result = result.fill_null(na)
         return self._convert_bool_result(result)
+
+    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
+        if (
+            pa_version_under13p0
+            and not (start != 0 and end is not None)
+            and not (start == 0 and end is None)
+        ):
+            # GH#59562
+            res_list = self._apply_elementwise(lambda val: val.find(sub, start, end))
+            return self._convert_int_result(pa.chunked_array(res_list))
+
+        if (start == 0 or start is None) and end is None:
+            result = pc.find_substring(self._pa_array, sub)
+        else:
+            if sub == "":
+                # GH#56792
+                res_list = self._apply_elementwise(
+                    lambda val: val.find(sub, start, end)
+                )
+                return self._convert_int_result(pa.chunked_array(res_list))
+            if start is None:
+                start_offset = 0
+                start = 0
+            elif start < 0:
+                start_offset = pc.add(start, pc.utf8_length(self._pa_array))
+                start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
+            else:
+                start_offset = start
+            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
+            result = pc.find_substring(slices, sub)
+            found = pc.not_equal(result, pa.scalar(-1, type=result.type))
+            offset_result = pc.add(result, start_offset)
+            result = pc.if_else(found, offset_result, -1)
+        return self._convert_int_result(result)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 40819ba4ab338..15f9ba611a642 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2373,29 +2373,6 @@ def _str_fullmatch(
             pat = f"{pat}$"
         return self._str_match(pat, case, flags, na)
 
-    def _str_find(self, sub: str, start: int = 0, end: int | None = None) -> Self:
-        if (start == 0 or start is None) and end is None:
-            result = pc.find_substring(self._pa_array, sub)
-        else:
-            if sub == "":
-                # GH 56792
-                result = self._apply_elementwise(lambda val: val.find(sub, start, end))
-                return type(self)(pa.chunked_array(result))
-            if start is None:
-                start_offset = 0
-                start = 0
-            elif start < 0:
-                start_offset = pc.add(start, pc.utf8_length(self._pa_array))
-                start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
-            else:
-                start_offset = start
-            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
-            result = pc.find_substring(slices, sub)
-            found = pc.not_equal(result, pa.scalar(-1, type=result.type))
-            offset_result = pc.add(result, start_offset)
-            result = pc.if_else(found, offset_result, -1)
-        return type(self)(result)
-
     def _str_join(self, sep: str) -> Self:
         if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
             self._pa_array.type
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index e18beb629d0c4..97381b82ceab9 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -416,18 +416,14 @@ def _str_count(self, pat: str, flags: int = 0):
         return self._convert_int_result(result)
 
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
-        if start != 0 and end is not None:
-            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
-            result = pc.find_substring(slices, sub)
-            not_found = pc.equal(result, -1)
-            offset_result = pc.add(result, end - start)
-            result = pc.if_else(not_found, result, offset_result)
-        elif start == 0 and end is None:
-            slices = self._pa_array
-            result = pc.find_substring(slices, sub)
-        else:
+        if (
+            pa_version_under13p0
+            and not (start != 0 and end is not None)
+            and not (start == 0 and end is None)
+        ):
+            # GH#59562
             return super()._str_find(sub, start, end)
-        return self._convert_int_result(result)
+        return ArrowStringArrayMixin._str_find(self, sub, start, end)
 
     def _str_get_dummies(self, sep: str = "|"):
         dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 3dbdda388d035..fc4f14882b9d7 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -32,8 +32,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas._libs.tslibs import timezones
 from pandas.compat import (
@@ -1947,14 +1945,9 @@ def test_str_find_negative_start():
 
 def test_str_find_no_end():
     ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
-    if pa_version_under13p0:
-        # https://github.com/apache/arrow/issues/36311
-        with pytest.raises(pa.lib.ArrowInvalid, match="Negative buffer resize"):
-            ser.str.find("ab", start=1)
-    else:
-        result = ser.str.find("ab", start=1)
-        expected = pd.Series([-1, None], dtype="int64[pyarrow]")
-        tm.assert_series_equal(result, expected)
+    result = ser.str.find("ab", start=1)
+    expected = pd.Series([-1, None], dtype="int64[pyarrow]")
+    tm.assert_series_equal(result, expected)
 
 
 def test_str_find_negative_start_negative_end():
@@ -1968,17 +1961,11 @@ def test_str_find_negative_start_negative_end():
 def test_str_find_large_start():
     # GH 56791
     ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string()))
-    if pa_version_under13p0:
-        # https://github.com/apache/arrow/issues/36311
-        with pytest.raises(pa.lib.ArrowInvalid, match="Negative buffer resize"):
-            ser.str.find(sub="d", start=16)
-    else:
-        result = ser.str.find(sub="d", start=16)
-        expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64()))
-        tm.assert_series_equal(result, expected)
+    result = ser.str.find(sub="d", start=16)
+    expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64()))
+    tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.skipif(
     pa_version_under13p0, reason="https://github.com/apache/arrow/issues/36311"
 )
@@ -1990,11 +1977,15 @@ def test_str_find_e2e(start, end, sub):
         ["abcaadef", "abc", "abcdeddefgj8292", "ab", "a", ""],
         dtype=ArrowDtype(pa.string()),
     )
-    object_series = s.astype(pd.StringDtype())
+    object_series = s.astype(pd.StringDtype(storage="python"))
     result = s.str.find(sub, start, end)
     expected = object_series.str.find(sub, start, end).astype(result.dtype)
     tm.assert_series_equal(result, expected)
 
+    arrow_str_series = s.astype(pd.StringDtype(storage="pyarrow"))
+    result2 = arrow_str_series.str.find(sub, start, end).astype(result.dtype)
+    tm.assert_series_equal(result2, expected)
+
 
 def test_str_find_negative_start_negative_end_no_match():
     # GH 56791

From 38ccb331b15dd301a85b3413673ae144498d4c1f Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 6 Sep 2024 23:06:00 +0530
Subject: [PATCH 069/176] DOC: fix SA01 for pandas.Period.to_timestamp (#59730)

---
 ci/code_checks.sh              | 1 -
 pandas/_libs/tslibs/period.pyx | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fcbeb20d083d6..2fc9c1a83c097 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.NA SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
-        -i "pandas.Period.to_timestamp SA01" \
         -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.start SA01" \
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index e4771feeb804e..c563ab91c4142 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -2001,6 +2001,12 @@ cdef class _Period(PeriodMixin):
         -------
         Timestamp
 
+        See Also
+        --------
+        Timestamp : A class representing a single point in time.
+        Period : Represents a span of time with a fixed frequency.
+        PeriodIndex.to_timestamp : Convert a `PeriodIndex` to a `DatetimeIndex`.
+
         Examples
         --------
         >>> period = pd.Period('2023-1-1', freq='D')

From 8cd761a2d1553d7dfa986f3c574f03f2fc62587e Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 6 Sep 2024 23:06:50 +0530
Subject: [PATCH 070/176] DOC: fix SA01,ES01 for pandas.Timedelta.view (#59733)

---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timedeltas.pyx | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2fc9c1a83c097..7ed5103b3b796 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -113,7 +113,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.resolution PR02" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.Timedelta.total_seconds SA01" \
-        -i "pandas.Timedelta.view SA01" \
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index a7bc2de5ad837..4f90f26cf31ab 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1458,11 +1458,26 @@ cdef class _Timedelta(timedelta):
         """
         Array view compatibility.
 
+        This method allows you to reinterpret the underlying data of a Timedelta
+        object as a different dtype. The `view` method provides a way to reinterpret
+        the internal representation of the `Timedelta` object without modifying its
+        data. This is particularly useful when you need to work with the underlying
+        data directly, such as for performance optimizations or interfacing with
+        low-level APIs. The returned value is typically the number of nanoseconds
+        since the epoch, represented as an integer or another specified dtype.
+
         Parameters
         ----------
         dtype : str or dtype
             The dtype to view the underlying data as.
 
+        See Also
+        --------
+        numpy.ndarray.view : Returns a view of an array with the same data.
+        Timedelta.to_numpy : Converts the Timedelta to a NumPy timedelta64.
+        Timedelta.total_seconds : Returns the total duration of the Timedelta
+            object in seconds.
+
         Examples
         --------
         >>> td = pd.Timedelta('3D')

From 4a16b44bfabc70854f1d3a1447e7050725ff16d9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 6 Sep 2024 19:37:42 +0200
Subject: [PATCH 071/176] String dtype: implement _get_common_dtype (#59682)

* String dtype: implement _get_common_dtype

* add specific tests

* try fix typing

* try fix typing

* suppress typing error

* support numpy 2.0 string

* fix typo
---
 pandas/core/arrays/string_.py               | 32 ++++++++-
 pandas/tests/arrays/categorical/test_api.py |  3 -
 pandas/tests/arrays/string_/test_concat.py  | 73 +++++++++++++++++++++
 3 files changed, 103 insertions(+), 5 deletions(-)
 create mode 100644 pandas/tests/arrays/string_/test_concat.py

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 143a13c54dbbb..88fd1481031f8 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -171,9 +171,9 @@ def __init__(
             # a consistent NaN value (and we can use `dtype.na_value is np.nan`)
             na_value = np.nan
         elif na_value is not libmissing.NA:
-            raise ValueError("'na_value' must be np.nan or pd.NA, got {na_value}")
+            raise ValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}")
 
-        self.storage = storage
+        self.storage = cast(str, storage)
         self._na_value = na_value
 
     def __repr__(self) -> str:
@@ -284,6 +284,34 @@ def construct_array_type(  # type: ignore[override]
         else:
             return ArrowStringArrayNumpySemantics
 
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        storages = set()
+        na_values = set()
+
+        for dtype in dtypes:
+            if isinstance(dtype, StringDtype):
+                storages.add(dtype.storage)
+                na_values.add(dtype.na_value)
+            elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "T"):
+                continue
+            else:
+                return None
+
+        if len(storages) == 2:
+            # if both python and pyarrow storage -> priority to pyarrow
+            storage = "pyarrow"
+        else:
+            storage = next(iter(storages))  # type: ignore[assignment]
+
+        na_value: libmissing.NAType | float
+        if len(na_values) == 2:
+            # if both NaN and NA -> priority to NA
+            na_value = libmissing.NA
+        else:
+            na_value = next(iter(na_values))
+
+        return StringDtype(storage=storage, na_value=na_value)
+
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
     ) -> BaseStringArray:
diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 2ccc5781c608e..2791fd55f54d7 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import PY311
 
 from pandas import (
@@ -151,7 +149,6 @@ def test_reorder_categories_raises(self, new_categories):
         with pytest.raises(ValueError, match=msg):
             cat.reorder_categories(new_categories)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_add_categories(self):
         cat = Categorical(["a", "b", "c", "a"], ordered=True)
         old = cat.copy()
diff --git a/pandas/tests/arrays/string_/test_concat.py b/pandas/tests/arrays/string_/test_concat.py
new file mode 100644
index 0000000000000..320d700b2b6c3
--- /dev/null
+++ b/pandas/tests/arrays/string_/test_concat.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pytest
+
+from pandas.compat import HAS_PYARROW
+
+from pandas.core.dtypes.cast import find_common_type
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.util.version import Version
+
+
+@pytest.mark.parametrize(
+    "to_concat_dtypes, result_dtype",
+    [
+        # same types
+        ([("pyarrow", pd.NA), ("pyarrow", pd.NA)], ("pyarrow", pd.NA)),
+        ([("pyarrow", np.nan), ("pyarrow", np.nan)], ("pyarrow", np.nan)),
+        ([("python", pd.NA), ("python", pd.NA)], ("python", pd.NA)),
+        ([("python", np.nan), ("python", np.nan)], ("python", np.nan)),
+        # pyarrow preference
+        ([("pyarrow", pd.NA), ("python", pd.NA)], ("pyarrow", pd.NA)),
+        # NA preference
+        ([("python", pd.NA), ("python", np.nan)], ("python", pd.NA)),
+    ],
+)
+def test_concat_series(request, to_concat_dtypes, result_dtype):
+    if any(storage == "pyarrow" for storage, _ in to_concat_dtypes) and not HAS_PYARROW:
+        pytest.skip("Could not import 'pyarrow'")
+
+    ser_list = [
+        pd.Series(["a", "b", None], dtype=pd.StringDtype(storage, na_value))
+        for storage, na_value in to_concat_dtypes
+    ]
+
+    result = pd.concat(ser_list, ignore_index=True)
+    expected = pd.Series(
+        ["a", "b", None, "a", "b", None], dtype=pd.StringDtype(*result_dtype)
+    )
+    tm.assert_series_equal(result, expected)
+
+    # order doesn't matter for result
+    result = pd.concat(ser_list[::1], ignore_index=True)
+    tm.assert_series_equal(result, expected)
+
+
+def test_concat_with_object(string_dtype_arguments):
+    # _get_common_dtype cannot inspect values, so object dtype with strings still
+    # results in object dtype
+    result = pd.concat(
+        [
+            pd.Series(["a", "b", None], dtype=pd.StringDtype(*string_dtype_arguments)),
+            pd.Series(["a", "b", None], dtype=object),
+        ]
+    )
+    assert result.dtype == np.dtype("object")
+
+
+def test_concat_with_numpy(string_dtype_arguments):
+    # common type with a numpy string dtype always preserves the pandas string dtype
+    dtype = pd.StringDtype(*string_dtype_arguments)
+    assert find_common_type([dtype, np.dtype("U")]) == dtype
+    assert find_common_type([np.dtype("U"), dtype]) == dtype
+    assert find_common_type([dtype, np.dtype("U10")]) == dtype
+    assert find_common_type([np.dtype("U10"), dtype]) == dtype
+
+    # with any other numpy dtype -> object
+    assert find_common_type([dtype, np.dtype("S")]) == np.dtype("object")
+    assert find_common_type([dtype, np.dtype("int64")]) == np.dtype("object")
+
+    if Version(np.__version__) >= Version("2"):
+        assert find_common_type([dtype, np.dtypes.StringDType()]) == dtype
+        assert find_common_type([np.dtypes.StringDType(), dtype]) == dtype

From 5a3a4f350440cd215efae034c506cbead6a1ad9e Mon Sep 17 00:00:00 2001
From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com>
Date: Fri, 6 Sep 2024 20:13:05 +0200
Subject: [PATCH 072/176] DOC: move `idxmin` and `idxmax` docs from
 core/shared_docs.py to core/frame.py (#59735)

move idxmin, idxmax docstring from shared_docs.py to frame.py
---
 pandas/core/frame.py       | 144 ++++++++++++++++++++++++++++++++++++-
 pandas/core/shared_docs.py | 130 ---------------------------------
 2 files changed, 142 insertions(+), 132 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f47acf579d79c..fe88cb86693e8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12745,10 +12745,80 @@ def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
         """
         return self.apply(Series.nunique, axis=axis, dropna=dropna)
 
-    @doc(_shared_docs["idxmin"], numeric_only_default="False")
     def idxmin(
         self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False
     ) -> Series:
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        NA/null values are excluded.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default 0
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+        skipna : bool, default True
+            Exclude NA/null values. If the entire DataFrame is NA,
+            or if ``skipna=False`` and there is an NA value, this method
+            will raise a ``ValueError``.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        Series
+            Indexes of minima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            * If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmin : Return index of the minimum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmin``.
+
+        Examples
+        --------
+        Consider a dataset containing food consumption in Argentina.
+
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         {
+        ...             "consumption": [10.51, 103.11, 55.48],
+        ...             "co2_emissions": [37.2, 19.66, 1712],
+        ...         }
+        ...     },
+        ...     index=["Pork", "Wheat Products", "Beef"],
+        ... )
+
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51         37.20
+        Wheat Products       103.11         19.66
+        Beef                  55.48       1712.00
+
+        By default, it returns the index for the minimum value in each column.
+
+        >>> df.idxmin()
+        consumption                Pork
+        co2_emissions    Wheat Products
+        dtype: object
+
+        To return the index for the minimum value in each row, use ``axis="columns"``.
+
+        >>> df.idxmin(axis="columns")
+        Pork                consumption
+        Wheat Products    co2_emissions
+        Beef                consumption
+        dtype: object
+        """
         axis = self._get_axis_number(axis)
 
         if self.empty and len(self.axes[axis]):
@@ -12782,10 +12852,80 @@ def idxmin(
         final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
         return final_result.__finalize__(self, method="idxmin")
 
-    @doc(_shared_docs["idxmax"], numeric_only_default="False")
     def idxmax(
         self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False
     ) -> Series:
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        NA/null values are excluded.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default 0
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+        skipna : bool, default True
+            Exclude NA/null values. If the entire DataFrame is NA,
+            or if ``skipna=False`` and there is an NA value, this method
+            will raise a ``ValueError``.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        Series
+            Indexes of maxima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            * If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmax : Return index of the maximum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmax``.
+
+        Examples
+        --------
+        Consider a dataset containing food consumption in Argentina.
+
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         {
+        ...             "consumption": [10.51, 103.11, 55.48],
+        ...             "co2_emissions": [37.2, 19.66, 1712],
+        ...         }
+        ...     },
+        ...     index=["Pork", "Wheat Products", "Beef"],
+        ... )
+
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51         37.20
+        Wheat Products       103.11         19.66
+        Beef                  55.48       1712.00
+
+        By default, it returns the index for the maximum value in each column.
+
+        >>> df.idxmax()
+        consumption     Wheat Products
+        co2_emissions             Beef
+        dtype: object
+
+        To return the index for the maximum value in each row, use ``axis="columns"``.
+
+        >>> df.idxmax(axis="columns")
+        Pork              co2_emissions
+        Wheat Products     consumption
+        Beef              co2_emissions
+        dtype: object
+        """
         axis = self._get_axis_number(axis)
 
         if self.empty and len(self.axes[axis]):
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 5725b96f66cd4..cb0c3d241534c 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -649,133 +649,3 @@
     3  3  d  e
     4  4  e  e
 """
-
-_shared_docs["idxmin"] = """
-    Return index of first occurrence of minimum over requested axis.
-
-    NA/null values are excluded.
-
-    Parameters
-    ----------
-    axis : {{0 or 'index', 1 or 'columns'}}, default 0
-        The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
-    skipna : bool, default True
-        Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
-        and there is an NA value, this method will raise a ``ValueError``.
-    numeric_only : bool, default {numeric_only_default}
-        Include only `float`, `int` or `boolean` data.
-
-        .. versionadded:: 1.5.0
-
-    Returns
-    -------
-    Series
-        Indexes of minima along the specified axis.
-
-    Raises
-    ------
-    ValueError
-        * If the row/column is empty
-
-    See Also
-    --------
-    Series.idxmin : Return index of the minimum element.
-
-    Notes
-    -----
-    This method is the DataFrame version of ``ndarray.argmin``.
-
-    Examples
-    --------
-    Consider a dataset containing food consumption in Argentina.
-
-    >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                   'co2_emissions': [37.2, 19.66, 1712]}},
-    ...                   index=['Pork', 'Wheat Products', 'Beef'])
-
-    >>> df
-                    consumption  co2_emissions
-    Pork                  10.51         37.20
-    Wheat Products       103.11         19.66
-    Beef                  55.48       1712.00
-
-    By default, it returns the index for the minimum value in each column.
-
-    >>> df.idxmin()
-    consumption                Pork
-    co2_emissions    Wheat Products
-    dtype: object
-
-    To return the index for the minimum value in each row, use ``axis="columns"``.
-
-    >>> df.idxmin(axis="columns")
-    Pork                consumption
-    Wheat Products    co2_emissions
-    Beef                consumption
-    dtype: object
-"""
-
-_shared_docs["idxmax"] = """
-    Return index of first occurrence of maximum over requested axis.
-
-    NA/null values are excluded.
-
-    Parameters
-    ----------
-    axis : {{0 or 'index', 1 or 'columns'}}, default 0
-        The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
-    skipna : bool, default True
-        Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
-        and there is an NA value, this method will raise a ``ValueError``.
-    numeric_only : bool, default {numeric_only_default}
-        Include only `float`, `int` or `boolean` data.
-
-        .. versionadded:: 1.5.0
-
-    Returns
-    -------
-    Series
-        Indexes of maxima along the specified axis.
-
-    Raises
-    ------
-    ValueError
-        * If the row/column is empty
-
-    See Also
-    --------
-    Series.idxmax : Return index of the maximum element.
-
-    Notes
-    -----
-    This method is the DataFrame version of ``ndarray.argmax``.
-
-    Examples
-    --------
-    Consider a dataset containing food consumption in Argentina.
-
-    >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                   'co2_emissions': [37.2, 19.66, 1712]}},
-    ...                   index=['Pork', 'Wheat Products', 'Beef'])
-
-    >>> df
-                    consumption  co2_emissions
-    Pork                  10.51         37.20
-    Wheat Products       103.11         19.66
-    Beef                  55.48       1712.00
-
-    By default, it returns the index for the maximum value in each column.
-
-    >>> df.idxmax()
-    consumption     Wheat Products
-    co2_emissions             Beef
-    dtype: object
-
-    To return the index for the maximum value in each row, use ``axis="columns"``.
-
-    >>> df.idxmax(axis="columns")
-    Pork              co2_emissions
-    Wheat Products     consumption
-    Beef              co2_emissions
-    dtype: object
-"""

From 5a07ed5a8e1522886b177bcce21568ecbfe63410 Mon Sep 17 00:00:00 2001
From: ammar-qazi <ammmarqz@gmail.com>
Date: Fri, 6 Sep 2024 20:14:09 +0200
Subject: [PATCH 073/176] Resolves #59670 by documenting that
 DataFrame.from_records()'s columns filters (includes) data.  (#59723)

Update frames.py to factor in explain columns reordering
---
 pandas/core/frame.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fe88cb86693e8..97df71e2c02a0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2124,9 +2124,10 @@ def from_records(
         columns : sequence, default None
             Column names to use. If the passed data do not have names
             associated with them, this argument provides names for the
-            columns. Otherwise this argument indicates the order of the columns
+            columns. Otherwise, this argument indicates the order of the columns
             in the result (any names not found in the data will become all-NA
-            columns).
+            columns) and limits the data to these columns if not all column names
+            are provided.
         coerce_float : bool, default False
             Attempt to convert values of non-string, non-numeric objects (like
             decimal.Decimal) to floating point, useful for SQL result sets.

From 352289b3b6e282fcf36d7634a45a5b93839be8fa Mon Sep 17 00:00:00 2001
From: Florian Bourgey <bourgeyflorian@gmail.com>
Date: Fri, 6 Sep 2024 14:15:31 -0400
Subject: [PATCH 074/176] Missing source link (#59549)

* merged DataFrame.index and DataFrame.columns with other Axes section.

* small clean for DataFrame.columns

* reverted frame.rst file
---
 pandas/core/frame.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 97df71e2c02a0..c80e9dfd23ba2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -13629,26 +13629,29 @@ def isin_(x):
     )
     columns = properties.AxisProperty(
         axis=0,
-        doc=dedent(
-            """
-                The column labels of the DataFrame.
-
-                See Also
-                --------
-                DataFrame.index: The index (row labels) of the DataFrame.
-                DataFrame.axes: Return a list representing the axes of the DataFrame.
-
-                Examples
-                --------
-                >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
-                >>> df
-                     A  B
-                0    1  3
-                1    2  4
-                >>> df.columns
-                Index(['A', 'B'], dtype='object')
-                """
-        ),
+        doc="""
+        The column labels of the DataFrame.
+
+        Returns
+        -------
+        pandas.Index
+            The column labels of the DataFrame.
+
+        See Also
+        --------
+        DataFrame.index: The index (row labels) of the DataFrame.
+        DataFrame.axes: Return a list representing the axes of the DataFrame.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
+        >>> df
+                A  B
+        0    1  3
+        1    2  4
+        >>> df.columns
+        Index(['A', 'B'], dtype='object')
+        """,
     )
 
     # ----------------------------------------------------------------------

From 80b685027108245086b78dbd9a176b096c92570a Mon Sep 17 00:00:00 2001
From: matiaslindgren <matias.lindgren@iki.fi>
Date: Sat, 7 Sep 2024 13:53:28 +0200
Subject: [PATCH 075/176] BUG: Fix inconsistent pivot table subaggregation when
 index is None (#59629)

---
 doc/source/whatsnew/v3.0.0.rst     |  1 +
 pandas/core/reshape/pivot.py       | 11 +++++++----
 pandas/tests/reshape/test_pivot.py | 28 ++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index cd353b60d1a6e..9a29ff4d49966 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -668,6 +668,7 @@ Reshaping
 - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
+- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 
 Sparse
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 0886aad310034..cfc6f91557781 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -557,7 +557,12 @@ def _all_key(key):
                 table_pieces.append(piece)
                 margin_keys.append(all_key)
         else:
-            from pandas import DataFrame
+            margin = (
+                data[cols[:1] + values]
+                .groupby(cols[:1], observed=observed)
+                .agg(aggfunc, **kwargs)
+                .T
+            )
 
             cat_axis = 0
             for key, piece in table.groupby(level=0, observed=observed):
@@ -566,9 +571,7 @@ def _all_key(key):
                 else:
                     all_key = margins_name
                 table_pieces.append(piece)
-                # GH31016 this is to calculate margin for each group, and assign
-                # corresponded key as index
-                transformed_piece = DataFrame(piece.apply(aggfunc, **kwargs)).T
+                transformed_piece = margin[key].to_frame().T
                 if isinstance(piece.index, MultiIndex):
                     # We are adding an empty level
                     transformed_piece.index = MultiIndex.from_tuples(
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 44b96afaa4ef5..8cfe565ebdd65 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -2785,3 +2785,31 @@ def test_pivot_empty_with_datetime(self):
             index="category", columns="value", values="timestamp"
         )
         assert df_pivoted.empty
+
+    def test_pivot_margins_with_none_index(self):
+        # GH#58722
+        df = DataFrame(
+            {
+                "x": [1, 1, 2],
+                "y": [3, 3, 4],
+                "z": [5, 5, 6],
+                "w": [7, 8, 9],
+            }
+        )
+        result = df.pivot_table(
+            index=None,
+            columns=["y", "z"],
+            values="w",
+            margins=True,
+            aggfunc="count",
+        )
+        expected = DataFrame(
+            [[2, 2, 1, 1]],
+            index=["w"],
+            columns=MultiIndex(
+                levels=[[3, 4], [5, 6, "All"]],
+                codes=[[0, 0, 1, 1], [0, 2, 1, 2]],
+                names=["y", "z"],
+            ),
+        )
+        tm.assert_frame_equal(result, expected)

From 13f45e70989625850dda374c5588d4beb54bd48c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 9 Sep 2024 05:53:48 -0500
Subject: [PATCH 076/176] TST/BUG (string dtype): Fix and adjust indexes string
 tests (#59544)

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/construction.py                   |  5 +++-
 pandas/core/indexes/base.py                   |  6 ++++-
 .../tests/indexes/base_class/test_setops.py   |  6 ++---
 pandas/tests/indexes/test_base.py             | 11 ++------
 pandas/tests/indexes/test_old_base.py         | 26 ++++++++-----------
 5 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 665eb75953078..bb3aa3867ab08 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -611,7 +611,10 @@ def sanitize_array(
                 dtype = StringDtype(na_value=np.nan)
                 subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)
 
-            if subarr is data and copy:
+            if (
+                subarr is data
+                or (subarr.dtype == "str" and subarr.dtype.storage == "python")  # type: ignore[union-attr]
+            ) and copy:
                 subarr = subarr.copy()
 
         else:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 582e1f96fa562..2346c20004210 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -504,7 +504,8 @@ def __new__(
 
         elif is_ea_or_datetimelike_dtype(dtype):
             # non-EA dtype indexes have special casting logic, so we punt here
-            pass
+            if isinstance(data, (set, frozenset)):
+                data = list(data)
 
         elif is_ea_or_datetimelike_dtype(data_dtype):
             pass
@@ -6877,6 +6878,9 @@ def insert(self, loc: int, item) -> Index:
             #  We cannot keep the same dtype, so cast to the (often object)
             #  minimal shared dtype before doing the insert.
             dtype = self._find_common_type_compat(item)
+            if dtype == self.dtype:
+                # EA's might run into recursion errors if loc is invalid
+                raise
             return self.astype(dtype).insert(loc, item)
 
         if arr.dtype != object or not isinstance(
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index f9636ec19f2ec..0e9fb77d6e8dd 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     Index,
@@ -233,7 +231,6 @@ def test_tuple_union_bug(self, method, expected, sort):
         expected = Index(expected)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("first_list", [["b", "a"], []])
     @pytest.mark.parametrize("second_list", [["a", "b"], []])
     @pytest.mark.parametrize(
@@ -243,6 +240,7 @@ def test_tuple_union_bug(self, method, expected, sort):
     def test_union_name_preservation(
         self, first_list, second_list, first_name, second_name, expected_name, sort
     ):
+        expected_dtype = object if not first_list or not second_list else "str"
         first = Index(first_list, name=first_name)
         second = Index(second_list, name=second_name)
         union = first.union(second, sort=sort)
@@ -253,7 +251,7 @@ def test_union_name_preservation(
             expected = Index(sorted(vals), name=expected_name)
             tm.assert_index_equal(union, expected)
         else:
-            expected = Index(vals, name=expected_name)
+            expected = Index(vals, name=expected_name, dtype=expected_dtype)
             tm.assert_index_equal(union.sort_values(), expected.sort_values())
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 7ec66100b7291..486b24845d2ff 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -76,9 +76,6 @@ def test_constructor_casting(self, index):
         tm.assert_contains_all(arr, new_index)
         tm.assert_index_equal(index, new_index)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_constructor_copy(self, using_infer_string):
         index = Index(list("abc"), name="name")
         arr = np.array(index)
@@ -343,11 +340,6 @@ def test_constructor_empty_special(self, empty, klass):
     def test_view_with_args(self, index):
         index.view("i8")
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.parametrize(
         "index",
         [
@@ -364,7 +356,8 @@ def test_view_with_args_object_array_raises(self, index):
             msg = "When changing to a larger dtype"
             with pytest.raises(ValueError, match=msg):
                 index.view("i8")
-        elif index.dtype == "string":
+        elif index.dtype == "str" and not index.dtype.storage == "python":
+            # TODO(infer_string): Make the errors consistent
             with pytest.raises(NotImplementedError, match="i8"):
                 index.view("i8")
         else:
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index b41871ee921fd..75284a8f8fd47 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -6,10 +6,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs import Timestamp
-from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import (
     is_integer_dtype,
@@ -28,6 +25,7 @@
     PeriodIndex,
     RangeIndex,
     Series,
+    StringDtype,
     TimedeltaIndex,
     isna,
     period_range,
@@ -229,7 +227,6 @@ def test_logical_compat(self, simple_index):
             with pytest.raises(TypeError, match=msg):
                 idx.any()
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_repr_roundtrip(self, simple_index):
         if isinstance(simple_index, IntervalIndex):
             pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
@@ -246,11 +243,6 @@ def test_repr_max_seq_item_setting(self, simple_index):
             repr(idx)
             assert "..." not in str(idx)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_ensure_copied_data(self, index):
         # Check the "copy" argument of each Index.__new__ is honoured
@@ -296,7 +288,9 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._mask, result._values._mask, check_same="same"
                 )
-            elif index.dtype == "string[python]":
+            elif (
+                isinstance(index.dtype, StringDtype) and index.dtype.storage == "python"
+            ):
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)
                 tm.assert_numpy_array_equal(
                     index._values._ndarray, result._values._ndarray, check_same="same"
@@ -444,11 +438,7 @@ def test_insert_base(self, index):
         result = trimmed.insert(0, index[0])
         assert index[0:4].equals(result)
 
-    @pytest.mark.skipif(
-        using_string_dtype(),
-        reason="completely different behavior, tested elsewher",
-    )
-    def test_insert_out_of_bounds(self, index):
+    def test_insert_out_of_bounds(self, index, using_infer_string):
         # TypeError/IndexError matches what np.insert raises in these cases
 
         if len(index) > 0:
@@ -460,6 +450,12 @@ def test_insert_out_of_bounds(self, index):
             msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
         else:
             msg = "slice indices must be integers or None or have an __index__ method"
+
+        if using_infer_string and (
+            index.dtype == "string" or index.dtype == "category"  # noqa: PLR1714
+        ):
+            msg = "loc must be an integer between"
+
         with pytest.raises(err, match=msg):
             index.insert(0.5, "foo")
 

From b7dedf56ad529a2b18f17ae621a69644867c69c7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 9 Sep 2024 06:40:22 -0500
Subject: [PATCH 077/176] TST (string dtype): Adjust indexing string tests
 (#59541)

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/arrays/string_.py              |  4 ++
 pandas/core/arrays/string_arrow.py         |  2 +-
 pandas/tests/arrays/string_/test_string.py |  5 +--
 pandas/tests/indexing/test_iloc.py         | 31 +++++++-------
 pandas/tests/indexing/test_indexing.py     | 18 ++++----
 pandas/tests/indexing/test_loc.py          | 48 +++++++++++++---------
 6 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 88fd1481031f8..a46475a7d1ec2 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -715,6 +715,10 @@ def __setitem__(self, key, value) -> None:
         else:
             if not is_array_like(value):
                 value = np.asarray(value, dtype=object)
+            else:
+                # cast categories and friends to arrays to see if values are
+                # compatible, compatibility with arrow backed strings
+                value = np.asarray(value)
             if len(value) and not lib.is_string_array(value, skipna=True):
                 raise TypeError("Must provide strings.")
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 97381b82ceab9..1e5adf106752f 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -240,7 +240,7 @@ def _maybe_convert_setitem_value(self, value):
             value[isna(value)] = None
             for v in value:
                 if not (v is None or isinstance(v, str)):
-                    raise TypeError("Scalar must be NA or str")
+                    raise TypeError("Must provide strings")
         return super()._maybe_convert_setitem_value(value)
 
     def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index dd87dbf8e9a43..87bd1d5921caa 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -102,10 +102,7 @@ def test_setitem_validates(cls, dtype):
     with pytest.raises(TypeError, match=msg):
         arr[0] = 10
 
-    if dtype.storage == "python":
-        msg = "Must provide strings."
-    else:
-        msg = "Scalar must be NA or str"
+    msg = "Must provide strings"
     with pytest.raises(TypeError, match=msg):
         arr[:] = np.array([1, 2])
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index b05b5d3dea2dc..dc95e1bb1b8a0 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 
 from pandas import (
@@ -1198,22 +1196,25 @@ def test_iloc_getitem_int_single_ea_block_view(self):
         arr[2] = arr[-1]
         assert ser[0] == arr[-1]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_iloc_setitem_multicolumn_to_datetime(self):
+    def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
         # GH#20511
         df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})
 
-        df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
-        expected = DataFrame(
-            {
-                "A": [
-                    Timestamp("2021-01-01 00:00:00"),
-                    Timestamp("2022-01-01 00:00:00"),
-                ],
-                "B": ["2021", "2022"],
-            }
-        )
-        tm.assert_frame_equal(df, expected, check_dtype=False)
+        if using_infer_string:
+            with pytest.raises(TypeError, match="Invalid value"):
+                df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
+        else:
+            df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
+            expected = DataFrame(
+                {
+                    "A": [
+                        Timestamp("2021-01-01 00:00:00"),
+                        Timestamp("2022-01-01 00:00:00"),
+                    ],
+                    "B": ["2021", "2022"],
+                }
+            )
+            tm.assert_frame_equal(df, expected, check_dtype=False)
 
 
 class TestILocErrors:
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index f7ada06e3ecb2..fb7e6649c534f 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 
 from pandas.core.dtypes.common import (
@@ -528,12 +526,12 @@ def test_string_slice_empty(self):
         with pytest.raises(KeyError, match="^0$"):
             df.loc["2011", 0]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_astype_assignment(self, using_infer_string):
         # GH4312 (iloc)
         df_orig = DataFrame(
             [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        df_orig[list("ABCDG")] = df_orig[list("ABCDG")].astype(object)
 
         df = df_orig.copy()
 
@@ -543,9 +541,9 @@ def test_astype_assignment(self, using_infer_string):
         expected = DataFrame(
             [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["A"] = expected["A"].astype(object)
-            expected["B"] = expected["B"].astype(object)
+        expected[list("CDG")] = expected[list("CDG")].astype(object)
+        expected["A"] = expected["A"].astype(object)
+        expected["B"] = expected["B"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         # GH5702 (loc)
@@ -554,18 +552,16 @@ def test_astype_assignment(self, using_infer_string):
         expected = DataFrame(
             [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["A"] = expected["A"].astype(object)
+        expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
         tm.assert_frame_equal(df, expected)
 
         df = df_orig.copy()
+
         df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
         expected = DataFrame(
             [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["B"] = expected["B"].astype(object)
-            expected["C"] = expected["C"].astype(object)
+        expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
         tm.assert_frame_equal(df, expected)
 
     def test_astype_assignment_full_replacements(self):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index e007b8c4e97ac..36b08ee1df790 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1,6 +1,7 @@
 """test label based indexing with loc"""
 
 from collections import namedtuple
+import contextlib
 from datetime import (
     date,
     datetime,
@@ -13,10 +14,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import index as libindex
-from pandas.compat import HAS_PYARROW
 from pandas.errors import IndexingError
 
 import pandas as pd
@@ -615,8 +613,7 @@ def test_loc_setitem_consistency_empty(self):
         expected["x"] = expected["x"].astype(np.int64)
         tm.assert_frame_equal(df, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_loc_setitem_consistency_slice_column_len(self):
+    def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
         # .loc[:,column] setting with slice == len of the column
         # GH10408
         levels = [
@@ -640,12 +637,23 @@ def test_loc_setitem_consistency_slice_column_len(self):
         ]
         df = DataFrame(values, index=mi, columns=cols)
 
-        df.loc[:, ("Respondent", "StartDate")] = to_datetime(
-            df.loc[:, ("Respondent", "StartDate")]
-        )
-        df.loc[:, ("Respondent", "EndDate")] = to_datetime(
-            df.loc[:, ("Respondent", "EndDate")]
-        )
+        ctx = contextlib.nullcontext()
+        if using_infer_string:
+            ctx = pytest.raises(TypeError, match="Invalid value")
+
+        with ctx:
+            df.loc[:, ("Respondent", "StartDate")] = to_datetime(
+                df.loc[:, ("Respondent", "StartDate")]
+            )
+        with ctx:
+            df.loc[:, ("Respondent", "EndDate")] = to_datetime(
+                df.loc[:, ("Respondent", "EndDate")]
+            )
+
+        if using_infer_string:
+            # infer-objects won't infer stuff anymore
+            return
+
         df = df.infer_objects()
 
         # Adding a new key
@@ -1211,20 +1219,23 @@ def test_loc_reverse_assignment(self):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
-    def test_loc_setitem_str_to_small_float_conversion_type(self):
+    def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
         # GH#20388
 
         col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
         result = DataFrame(col_data, columns=["A"])
-        expected = DataFrame(col_data, columns=["A"], dtype=object)
+        expected = DataFrame(col_data, columns=["A"])
         tm.assert_frame_equal(result, expected)
 
         # assigning with loc/iloc attempts to set the values inplace, which
         #  in this case is successful
-        result.loc[result.index, "A"] = [float(x) for x in col_data]
-        expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
-        tm.assert_frame_equal(result, expected)
+        if using_infer_string:
+            with pytest.raises(TypeError, match="Must provide strings"):
+                result.loc[result.index, "A"] = [float(x) for x in col_data]
+        else:
+            result.loc[result.index, "A"] = [float(x) for x in col_data]
+            expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
+            tm.assert_frame_equal(result, expected)
 
         # assigning the entire column using __setitem__ swaps in the new array
         # GH#???
@@ -1389,9 +1400,6 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
             df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
             df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_loc_setitem_single_row_categorical(self, using_infer_string):
         # GH#25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})

From b0593e20c4a661250df5ab4d832510c1f5819103 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 9 Sep 2024 09:38:47 -0700
Subject: [PATCH 078/176] Bump deadsnakes/action from 3.1.0 to 3.2.0 (#59757)

Bumps [deadsnakes/action](https://github.com/deadsnakes/action) from 3.1.0 to 3.2.0.
- [Release notes](https://github.com/deadsnakes/action/releases)
- [Commits](https://github.com/deadsnakes/action/compare/v3.1.0...v3.2.0)

---
updated-dependencies:
- dependency-name: deadsnakes/action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/unit-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index d392c84be66fe..d145836f3e596 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -380,7 +380,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Python Free-threading Version
-        uses: deadsnakes/action@v3.1.0
+        uses: deadsnakes/action@v3.2.0
         with:
           python-version: 3.13-dev
           nogil: true

From 53cadbbd89a3393d615e4d7abf48f3ec1903fe7b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 9 Sep 2024 19:15:02 +0200
Subject: [PATCH 079/176] TST (string dtype): adjust pandas/tests/reshape tests
 (#59762)

---
 pandas/tests/reshape/concat/test_concat.py    | 11 ++------
 pandas/tests/reshape/merge/test_merge_asof.py | 10 ++-----
 pandas/tests/reshape/test_get_dummies.py      | 10 ++-----
 pandas/tests/reshape/test_melt.py             | 25 ++++++-----------
 pandas/tests/reshape/test_pivot.py            | 28 ++++++++++++-------
 5 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index 8af224f1ad64f..d3edee17366f7 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -10,8 +10,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import InvalidIndexError
 
 import pandas as pd
@@ -47,18 +45,11 @@ def test_append_concat(self):
         assert isinstance(result.index, PeriodIndex)
         assert result.index[0] == s1.index[0]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_concat_copy(self):
         df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
         df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1))
         df3 = DataFrame({5: "foo"}, index=range(4))
 
-        # These are actual copies.
-        result = concat([df, df2, df3], axis=1)
-        for block in result._mgr.blocks:
-            assert block.values.base is not None
-
-        # These are the same.
         result = concat([df, df2, df3], axis=1)
 
         for block in result._mgr.blocks:
@@ -69,6 +60,8 @@ def test_concat_copy(self):
                 assert arr.base is df2._mgr.blocks[0].values.base
             elif arr.dtype == object:
                 assert arr.base is not None
+            elif arr.dtype == "string":
+                tm.shares_memory(arr, df3._mgr.blocks[0].values)
 
         # Float block was consolidated.
         df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 8d972087b0dff..f7b0876c5a605 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -3064,12 +3062,8 @@ def test_on_float_by_int(self):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_merge_datatype_error_raises(self, using_infer_string):
-        if using_infer_string:
-            msg = "incompatible merge keys"
-        else:
-            msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
+    def test_merge_datatype_error_raises(self):
+        msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
 
         left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
         right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index 27a34decae7b0..f07c6845366da 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
@@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
+    def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
         # GH44965
         df = df[["A", "B"]]
-        df = df.astype({"A": "object", "B": "string"})
+        df = df.astype({"A": "str", "B": any_string_dtype})
         result = get_dummies(df)
         expected = DataFrame(
             {
@@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
             },
             dtype=bool,
         )
-        if not using_infer_string:
-            # infer_string returns numpy bools
+        if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
             expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index be4f2ab4d183d..4a12404f6775a 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -21,7 +19,7 @@
 def df():
     res = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     res["id1"] = (res["A"] > 0).astype(np.int64)
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
         result2 = df.melt(id_vars=["id1", "id2"])
         assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_value_vars(self, df):
         result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
         assert len(result3) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
         )
         tm.assert_frame_equal(result4, expected4)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize("type_", (tuple, list, np.array))
     def test_value_vars_types(self, type_, df):
         # GH 15348
@@ -178,7 +174,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
         with pytest.raises(ValueError, match=msg):
             df1.melt(id_vars=id_vars, value_vars=value_vars)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_var_name(self, df, var_name):
         result5 = df.melt(var_name=var_name)
         assert result5.columns.tolist() == ["var", "value"]
@@ -206,7 +201,6 @@ def test_custom_var_name(self, df, var_name):
         )
         tm.assert_frame_equal(result9, expected9)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_value_name(self, df, value_name):
         result10 = df.melt(value_name=value_name)
         assert result10.columns.tolist() == ["variable", "val"]
@@ -236,7 +230,6 @@ def test_custom_value_name(self, df, value_name):
         )
         tm.assert_frame_equal(result14, expected14)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_var_and_value_name(self, df, value_name, var_name):
         result15 = df.melt(var_name=var_name, value_name=value_name)
         assert result15.columns.tolist() == ["var", "val"]
@@ -361,7 +354,6 @@ def test_melt_missing_columns_raises(self):
         with pytest.raises(KeyError, match=msg):
             df.melt(["A"], ["F"], col_level=0)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_melt_mixed_int_str_id_vars(self):
         # GH 29718
         df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
@@ -369,6 +361,8 @@ def test_melt_mixed_int_str_id_vars(self):
         expected = DataFrame(
             {0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
         )
+        # the df's columns are mixed type and thus object -> preserves object dtype
+        expected["variable"] = expected["variable"].astype(object)
         tm.assert_frame_equal(result, expected)
 
     def test_melt_mixed_int_str_value_vars(self):
@@ -1222,12 +1216,10 @@ def test_raise_of_column_name_value(self):
         ):
             df.melt(id_vars="value", value_name="value")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    @pytest.mark.parametrize("dtype", ["O", "string"])
-    def test_missing_stubname(self, dtype):
+    def test_missing_stubname(self, any_string_dtype):
         # GH46044
         df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
-        df = df.astype({"id": dtype})
+        df = df.astype({"id": any_string_dtype})
         result = wide_to_long(
             df,
             stubnames=["a", "b"],
@@ -1243,12 +1235,13 @@ def test_missing_stubname(self, dtype):
             {"a": [100, 200, 300, 400], "b": [np.nan] * 4},
             index=index,
         )
-        new_level = expected.index.levels[0].astype(dtype)
+        new_level = expected.index.levels[0].astype(any_string_dtype)
+        if any_string_dtype == "object":
+            new_level = expected.index.levels[0].astype("str")
         expected.index = expected.index.set_levels(new_level, level=0)
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_wide_to_long_pyarrow_string_columns():
     # GH 57066
     pytest.importorskip("pyarrow")
@@ -1267,7 +1260,7 @@ def test_wide_to_long_pyarrow_string_columns():
     )
     expected = DataFrame(
         [[1, 1], [1, 1], [1, 2]],
-        columns=Index(["D", "R"], dtype=object),
+        columns=Index(["D", "R"]),
         index=pd.MultiIndex.from_arrays(
             [
                 [1, 1, 1],
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 8cfe565ebdd65..eccf676b87f89 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1068,7 +1068,6 @@ def test_margins_dtype_len(self, data):
 
         tm.assert_frame_equal(expected, result)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
     def test_pivot_table_multiindex_only(self, cols):
         # GH 17038
@@ -1078,7 +1077,7 @@ def test_pivot_table_multiindex_only(self, cols):
         expected = DataFrame(
             [[4.0, 5.0, 6.0]],
             columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
-            index=Index(["v"], dtype=object),
+            index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
         )
 
         tm.assert_frame_equal(result, expected)
@@ -2570,13 +2569,16 @@ def test_pivot_empty(self):
         expected = DataFrame(index=[], columns=[])
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    @pytest.mark.parametrize("dtype", [object, "string"])
-    def test_pivot_integer_bug(self, dtype):
-        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
+    def test_pivot_integer_bug(self, any_string_dtype):
+        df = DataFrame(
+            data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
+        )
 
         result = df.pivot(index=1, columns=0, values=2)
-        tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
+        expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
+        if any_string_dtype == "object":
+            expected_columns = expected_columns.astype("str")
+        tm.assert_index_equal(result.columns, expected_columns)
 
     def test_pivot_index_none(self):
         # GH#3962
@@ -2658,7 +2660,9 @@ def test_pivot_columns_not_given(self):
         with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
             df.pivot()
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_columns_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2674,7 +2678,9 @@ def test_pivot_columns_is_none(self):
         expected = DataFrame({1: 3}, index=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_index_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2688,7 +2694,9 @@ def test_pivot_index_is_none(self):
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_values_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})

From 078b73226eb06b6a13bd5822efa5fba7fe47e97c Mon Sep 17 00:00:00 2001
From: Abhinav Reddy <abhinav071197@gmail.com>
Date: Mon, 9 Sep 2024 13:22:08 -0400
Subject: [PATCH 080/176] Fix docs for api.types (#59753)

* Fix is_bool

* Fix is_categorical_dtype

* Fix is_complex

* Fix is_complex_dtype

* Fix is_datetime64_dtype

* Fix is_datetime64_ns_dtype

* Fix is_datetime64tz_dtype

---------

Co-authored-by: Abhinav Thimma <athimma2@illinois.edu>
---
 ci/code_checks.sh            |  7 -------
 pandas/_libs/lib.pyx         | 23 +++++++++++++++++++++++
 pandas/core/dtypes/common.py | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7ed5103b3b796..44a6b91aeb565 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -123,13 +123,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-        -i "pandas.api.types.is_bool PR01,SA01" \
-        -i "pandas.api.types.is_categorical_dtype SA01" \
-        -i "pandas.api.types.is_complex PR01,SA01" \
-        -i "pandas.api.types.is_complex_dtype SA01" \
-        -i "pandas.api.types.is_datetime64_dtype SA01" \
-        -i "pandas.api.types.is_datetime64_ns_dtype SA01" \
-        -i "pandas.api.types.is_datetime64tz_dtype SA01" \
         -i "pandas.api.types.is_dict_like PR07,SA01" \
         -i "pandas.api.types.is_extension_array_dtype SA01" \
         -i "pandas.api.types.is_file_like PR07,SA01" \
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e1a2a0142c52e..47a31954b9d6c 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1123,10 +1123,21 @@ def is_bool(obj: object) -> bool:
     """
     Return True if given object is boolean.
 
+    Parameters
+    ----------
+    obj : object
+        Object to check.
+
     Returns
     -------
     bool
 
+    See Also
+    --------
+    api.types.is_scalar : Check if the input is a scalar.
+    api.types.is_integer : Check if the input is an integer.
+    api.types.is_float : Check if the input is a float.
+
     Examples
     --------
     >>> pd.api.types.is_bool(True)
@@ -1142,10 +1153,22 @@ def is_complex(obj: object) -> bool:
     """
     Return True if given object is complex.
 
+    Parameters
+    ----------
+    obj : object
+        Object to check.
+
     Returns
     -------
     bool
 
+    See Also
+    --------
+    api.types.is_complex_dtype: Check whether the provided array or
+                                dtype is of a complex dtype.
+    api.types.is_number: Check if the object is a number.
+    api.types.is_integer: Return True if given object is integer.
+
     Examples
     --------
     >>> pd.api.types.is_complex(1 + 1j)
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index bcf1ade9b0320..16f6bd396fe93 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -279,6 +279,13 @@ def is_datetime64_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array-like or dtype is of the datetime64 dtype.
 
+    See Also
+    --------
+    api.types.is_datetime64_ns_dtype: Check whether the provided array or
+                                        dtype is of the datetime64[ns] dtype.
+    api.types.is_datetime64_any_dtype: Check whether the provided array or
+                                        dtype is of the datetime64 dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_datetime64_dtype
@@ -316,6 +323,13 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
 
+    See Also
+    --------
+    api.types.is_datetime64_dtype: Check whether an array-like or
+                                        dtype is of the datetime64 dtype.
+    api.types.is_datetime64_any_dtype: Check whether the provided array or
+                                        dtype is of the datetime64 dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_datetime64tz_dtype
@@ -514,6 +528,12 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array-like or dtype is of the Categorical dtype.
 
+    See Also
+    --------
+    api.types.is_list_like: Check if the object is list-like.
+    api.types.is_complex_dtype: Check whether the provided array or
+                                dtype is of a complex dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_categorical_dtype
@@ -977,6 +997,13 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
     bool
         Whether or not the array or dtype is of the datetime64[ns] dtype.
 
+    See Also
+    --------
+    api.types.is_datetime64_dtype: Check whether an array-like or
+                                        dtype is of the datetime64 dtype.
+    api.types.is_datetime64_any_dtype: Check whether the provided array or
+                                        dtype is of the datetime64 dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_datetime64_ns_dtype
@@ -1436,6 +1463,14 @@ def is_complex_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array or dtype is of a complex dtype.
 
+    See Also
+    --------
+    api.types.is_complex: Return True if given object is complex.
+    api.types.is_numeric_dtype: Check whether the provided array or
+                                dtype is of a numeric dtype.
+    api.types.is_integer_dtype: Check whether the provided array or
+                                dtype is of an integer dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_complex_dtype

From f3d19fb5298e98b2ff0a16dd03b6f30e32b38069 Mon Sep 17 00:00:00 2001
From: "Mien (Josephine) Nguyen" <josephinee.nguyen@gmail.com>
Date: Mon, 9 Sep 2024 13:27:15 -0400
Subject: [PATCH 081/176] TST: Update IntervalArray min/max test to fail on
 changed default skipna (#59747)

Test
---
 pandas/tests/arrays/interval/test_interval.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
index 58ba340441d86..8e13dcf25ceba 100644
--- a/pandas/tests/arrays/interval/test_interval.py
+++ b/pandas/tests/arrays/interval/test_interval.py
@@ -222,9 +222,10 @@ def test_min_max(self, left_right_dtypes, index_or_series_or_array):
         res = arr_na.max(skipna=False)
         assert np.isnan(res)
 
-        res = arr_na.min(skipna=True)
-        assert res == MIN
-        assert type(res) == type(MIN)
-        res = arr_na.max(skipna=True)
-        assert res == MAX
-        assert type(res) == type(MAX)
+        for kws in [{"skipna": True}, {}]:
+            res = arr_na.min(**kws)
+            assert res == MIN
+            assert type(res) == type(MIN)
+            res = arr_na.max(**kws)
+            assert res == MAX
+            assert type(res) == type(MAX)

From ea22788f6193eeb1aa9dea25481ab7fe72ea41c5 Mon Sep 17 00:00:00 2001
From: ivanpan0626 <151955212+ivanpan0626@users.noreply.github.com>
Date: Mon, 9 Sep 2024 13:28:44 -0400
Subject: [PATCH 082/176] DOCS: fix docstring validation errors for
 groupby.DataFrameGroupBy.filter, groupby.SeriesGroupBy.filter (#59742)

* DOCS: fix docstring validation errors for pandas.core.groupby.DataFrameGroupBy.filter

DOC string fix for both groupby.DataFrameGroupBy.filter and groupby.SeriesGroupBy.filter

* Update generic.py

* Update generic.py

* Update generic.py

* quickfix
---
 ci/code_checks.sh              |  2 --
 pandas/core/groupby/generic.py | 18 +++++++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 44a6b91aeb565..fdacd2fed7729 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -156,7 +156,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
@@ -172,7 +171,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
         -i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
-        -i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index c112d9b6a4b54..230f61bab96df 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -600,15 +600,23 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
         ----------
         func : function
             Criterion to apply to each group. Should return True or False.
-        dropna : bool
+        dropna : bool, optional
             Drop groups that do not pass the filter. True by default; if False,
             groups that evaluate False are filled with NaNs.
+        *args : tuple
+            Optional positional arguments to pass to `func`.
+        **kwargs : dict
+            Optional keyword arguments to pass to `func`.
 
         Returns
         -------
         Series
             The filtered subset of the original Series.
 
+        See Also
+        --------
+        DataFrameGroupBy.filter : Filter elements from groups base on criterion.
+
         Notes
         -----
         Functions that mutate the passed object can produce unexpected
@@ -1943,9 +1951,9 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
         dropna : bool
             Drop groups that do not pass the filter. True by default; if False,
             groups that evaluate False are filled with NaNs.
-        *args
+        *args : tuple
             Additional positional arguments to pass to `func`.
-        **kwargs
+        **kwargs : dict
             Additional keyword arguments to pass to `func`.
 
         Returns
@@ -1953,6 +1961,10 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
         DataFrame
             The filtered subset of the original DataFrame.
 
+        See Also
+        --------
+        SeriesGroupBy.filter : Filter elements from groups base on criterion.
+
         Notes
         -----
         Each subframe is endowed the attribute 'name' in case you need to know

From 6b74d6f61552f152422ffa53191301aa94b82ade Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 9 Sep 2024 23:04:13 +0530
Subject: [PATCH 083/176] DOC: fix SA01,ES01 for pandas.RangeIndex.stop
 (#59729)

* DOC: fix SA01,ES01 for pandas.RangeIndex.stop

* remove superfluous description of RangeIndex
---
 ci/code_checks.sh            |  1 -
 pandas/core/indexes/range.py | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fdacd2fed7729..fa4e7ed8c3104 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -77,7 +77,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.start SA01" \
         -i "pandas.RangeIndex.step SA01" \
-        -i "pandas.RangeIndex.stop SA01" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
         -i "pandas.Series.cat.as_ordered PR01" \
         -i "pandas.Series.cat.as_unordered PR01" \
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index b11ce6bd7b919..154e142c41db2 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -313,6 +313,17 @@ def stop(self) -> int:
         """
         The value of the `stop` parameter.
 
+        This property returns the `stop` value of the RangeIndex, which defines the
+        upper (or lower, in case of negative steps) bound of the index range. The
+        `stop` value is exclusive, meaning the RangeIndex includes values up to but
+        not including this value.
+
+        See Also
+        --------
+        RangeIndex : Immutable index representing a range of integers.
+        RangeIndex.start : The start value of the RangeIndex.
+        RangeIndex.step : The step size between elements in the RangeIndex.
+
         Examples
         --------
         >>> idx = pd.RangeIndex(5)

From 9ec6b2a4771170f9fdf70f0e166229eb54ad3a75 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 9 Sep 2024 23:04:38 +0530
Subject: [PATCH 084/176] DOC: fix SA01,ES01 for pandas.RangeIndex.start
 (#59728)

* DOC: fix SA01,ES01 for pandas.RangeIndex.start

* remove superfluous description of RangeIndex
---
 ci/code_checks.sh            |  1 -
 pandas/core/indexes/range.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fa4e7ed8c3104..2870de5a0c85a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
-        -i "pandas.RangeIndex.start SA01" \
         -i "pandas.RangeIndex.step SA01" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
         -i "pandas.Series.cat.as_ordered PR01" \
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 154e142c41db2..75d0dfbeb6f01 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -295,6 +295,16 @@ def start(self) -> int:
         """
         The value of the `start` parameter (``0`` if this was not supplied).
 
+        This property returns the starting value of the `RangeIndex`. If the `start`
+        value is not explicitly provided during the creation of the `RangeIndex`,
+        it defaults to 0.
+
+        See Also
+        --------
+        RangeIndex : Immutable index implementing a range-based index.
+        RangeIndex.stop : Returns the stop value of the `RangeIndex`.
+        RangeIndex.step : Returns the step value of the `RangeIndex`.
+
         Examples
         --------
         >>> idx = pd.RangeIndex(5)

From 871703dfc6150db112dde10a0135d3a758e77cd8 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 9 Sep 2024 18:36:52 +0100
Subject: [PATCH 085/176] fix: use fastpath for PyCapsule export when starting
 from pyarrow-backed Series, respect requested_schema (#59683)

* fix: use fastpath for PyCapsule export when starting from pyarrow-backed Series, respect requested_schema

* simplify

* stringdtype test
---
 pandas/core/series.py                       | 11 ++++--
 pandas/tests/series/test_arrow_interface.py | 38 +++++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4f79e30f48f3c..0c26ce27c680c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -580,8 +580,15 @@ def __arrow_c_stream__(self, requested_schema=None):
         PyCapsule
         """
         pa = import_optional_dependency("pyarrow", min_version="16.0.0")
-        ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)])
-        return ca.__arrow_c_stream__(requested_schema)
+        type = (
+            pa.DataType._import_from_c_capsule(requested_schema)
+            if requested_schema is not None
+            else None
+        )
+        ca = pa.array(self, type=type)
+        if not isinstance(ca, pa.ChunkedArray):
+            ca = pa.chunked_array([ca])
+        return ca.__arrow_c_stream__()
 
     # ----------------------------------------------------------------------
 
diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py
index 34a2a638e4185..e73cf9bee6aeb 100644
--- a/pandas/tests/series/test_arrow_interface.py
+++ b/pandas/tests/series/test_arrow_interface.py
@@ -21,3 +21,41 @@ def test_series_arrow_interface():
     ca = pa.chunked_array(s)
     expected = pa.chunked_array([[1, 4, 2]])
     assert ca.equals(expected)
+    ca = pa.chunked_array(s, type=pa.int32())
+    expected = pa.chunked_array([[1, 4, 2]], type=pa.int32())
+    assert ca.equals(expected)
+
+
+def test_series_arrow_interface_arrow_dtypes():
+    s = pd.Series([1, 4, 2], dtype="Int64[pyarrow]")
+
+    capsule = s.__arrow_c_stream__()
+    assert (
+        ctypes.pythonapi.PyCapsule_IsValid(
+            ctypes.py_object(capsule), b"arrow_array_stream"
+        )
+        == 1
+    )
+
+    ca = pa.chunked_array(s)
+    expected = pa.chunked_array([[1, 4, 2]])
+    assert ca.equals(expected)
+    ca = pa.chunked_array(s, type=pa.int32())
+    expected = pa.chunked_array([[1, 4, 2]], type=pa.int32())
+    assert ca.equals(expected)
+
+
+def test_series_arrow_interface_stringdtype():
+    s = pd.Series(["foo", "bar"], dtype="string[pyarrow]")
+
+    capsule = s.__arrow_c_stream__()
+    assert (
+        ctypes.pythonapi.PyCapsule_IsValid(
+            ctypes.py_object(capsule), b"arrow_array_stream"
+        )
+        == 1
+    )
+
+    ca = pa.chunked_array(s)
+    expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string())
+    assert ca.equals(expected)

From 47b56ea9ced016fc1c273c2453981a53666038a7 Mon Sep 17 00:00:00 2001
From: Katsia <47710336+KatsiarynaDzibrova@users.noreply.github.com>
Date: Mon, 9 Sep 2024 18:38:13 +0100
Subject: [PATCH 086/176] DOC: Fix pandas.Series.dt seconds, nanoseconds GL08,
 SA01 (#59582)

* fix pandas.Series.dt.freq

* fix seconds, nanoseconds, microseconds

* remove fixed objects from code_checks.sh

* Remove Timedelta Index checks

* fix freq example

* remove freq

* bring back microseconds
---
 ci/code_checks.sh                |  4 ----
 pandas/core/arrays/timedeltas.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2870de5a0c85a..06078d8958492 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -90,10 +90,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.floor PR01,PR02" \
         -i "pandas.Series.dt.freq GL08" \
         -i "pandas.Series.dt.month_name PR01,PR02" \
-        -i "pandas.Series.dt.nanoseconds SA01" \
         -i "pandas.Series.dt.normalize PR01" \
         -i "pandas.Series.dt.round PR01,PR02" \
-        -i "pandas.Series.dt.seconds SA01" \
         -i "pandas.Series.dt.strftime PR01,PR02" \
         -i "pandas.Series.dt.to_period PR01,PR02" \
         -i "pandas.Series.dt.total_seconds PR01" \
@@ -111,8 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.resolution PR02" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.Timedelta.total_seconds SA01" \
-        -i "pandas.TimedeltaIndex.nanoseconds SA01" \
-        -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index c8a86ffc187d0..754ae277e359a 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -842,6 +842,11 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
     seconds_docstring = textwrap.dedent(
         """Number of seconds (>= 0 and less than 1 day) for each element.
 
+    See Also
+    --------
+    Series.dt.seconds : Return number of seconds for each element.
+    Series.dt.nanoseconds : Return number of nanoseconds for each element.
+
     Examples
     --------
     For Series:
@@ -917,6 +922,11 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
     nanoseconds_docstring = textwrap.dedent(
         """Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.
 
+    See Also
+    --------
+    Series.dt.seconds : Return number of seconds for each element.
+    Series.dt.microseconds : Return number of nanoseconds for each element.
+
     Examples
     --------
     For Series:

From b717abb3131a4cd344b463583c8dd828cd1632bc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 9 Sep 2024 22:21:36 +0200
Subject: [PATCH 087/176] BUG (string dtype): fix inplace mutation with
 copy=False in ensure_string_array (#59756)

* BUG (string dtype): fix inplace mutation with copy=False in ensure_string_array

* update
---
 pandas/_libs/lib.pyx                  | 18 ++++++++++++------
 pandas/tests/copy_view/test_astype.py | 18 +++++++++++++-----
 pandas/tests/libs/test_lib.py         | 14 ++++++++++++++
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 47a31954b9d6c..75f58f565dd6f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -733,7 +733,9 @@ cpdef ndarray[object] ensure_string_array(
     convert_na_value : bool, default True
         If False, existing na values will be used unchanged in the new array.
     copy : bool, default True
-        Whether to ensure that a new array is returned.
+        Whether to ensure that a new array is returned. When True, a new array
+        is always returned. When False, a new array is only returned when needed
+        to avoid mutating the input array.
     skipna : bool, default True
         Whether or not to coerce nulls to their stringified form
         (e.g. if False, NaN becomes 'nan').
@@ -762,11 +764,15 @@ cpdef ndarray[object] ensure_string_array(
 
     result = np.asarray(arr, dtype="object")
 
-    if copy and (result is arr or np.shares_memory(arr, result)):
-        # GH#54654
-        result = result.copy()
-    elif not copy and result is arr:
-        already_copied = False
+    if result is arr or np.may_share_memory(arr, result):
+        # if np.asarray(..) did not make a copy of the input arr, we still need
+        #  to do that to avoid mutating the input array
+        # GH#54654: share_memory check is needed for rare cases where np.asarray
+        #  returns a new object without making a copy of the actual data
+        if copy:
+            result = result.copy()
+        else:
+            already_copied = False
     elif not copy and not result.flags.writeable:
         # Weird edge case where result is a view
         already_copied = False
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index de56d5e4a07ee..80c30f2d0c26e 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -7,7 +7,6 @@
 
 from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under12p0
-import pandas.util._test_decorators as td
 
 from pandas import (
     DataFrame,
@@ -111,7 +110,8 @@ def test_astype_string_and_object_update_original(dtype, new_dtype):
     tm.assert_frame_equal(df2, df_orig)
 
 
-def test_astype_string_copy_on_pickle_roundrip():
+def test_astype_str_copy_on_pickle_roundrip():
+    # TODO(infer_string) this test can be removed after 3.0 (once str is the default)
     # https://github.com/pandas-dev/pandas/issues/54654
     # ensure_string_array may alter array inplace
     base = Series(np.array([(1, 2), None, 1], dtype="object"))
@@ -120,14 +120,22 @@ def test_astype_string_copy_on_pickle_roundrip():
     tm.assert_series_equal(base, base_copy)
 
 
-@td.skip_if_no("pyarrow")
-def test_astype_string_read_only_on_pickle_roundrip():
+def test_astype_string_copy_on_pickle_roundrip(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/54654
+    # ensure_string_array may alter array inplace
+    base = Series(np.array([(1, 2), None, 1], dtype="object"))
+    base_copy = pickle.loads(pickle.dumps(base))
+    base_copy.astype(any_string_dtype)
+    tm.assert_series_equal(base, base_copy)
+
+
+def test_astype_string_read_only_on_pickle_roundrip(any_string_dtype):
     # https://github.com/pandas-dev/pandas/issues/54654
     # ensure_string_array may alter read-only array inplace
     base = Series(np.array([(1, 2), None, 1], dtype="object"))
     base_copy = pickle.loads(pickle.dumps(base))
     base_copy._values.flags.writeable = False
-    base_copy.astype("string[pyarrow]")
+    base_copy.astype(any_string_dtype)
     tm.assert_series_equal(base, base_copy)
 
 
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 8583d8bcc052c..17dae1879f3b8 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -1,3 +1,5 @@
+import pickle
+
 import numpy as np
 import pytest
 
@@ -283,3 +285,15 @@ def test_no_default_pickle():
     # GH#40397
     obj = tm.round_trip_pickle(lib.no_default)
     assert obj is lib.no_default
+
+
+def test_ensure_string_array_copy():
+    # ensure the original array is not modified in case of copy=False with
+    # pickle-roundtripped object dtype array
+    # https://github.com/pandas-dev/pandas/issues/54654
+    arr = np.array(["a", None], dtype=object)
+    arr = pickle.loads(pickle.dumps(arr))
+    result = lib.ensure_string_array(arr, copy=False)
+    assert not np.shares_memory(arr, result)
+    assert arr[1] is None
+    assert result[1] is np.nan

From 83fd9babc73fff1a5be53c3f33e8973ed9416b6e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 9 Sep 2024 22:34:28 +0200
Subject: [PATCH 088/176] TST (string dtype): remove usage of
 'string[pyarrow_numpy]' alias (#59758)

---
 pandas/conftest.py                            | 28 +++++++++++++++++++
 pandas/tests/apply/test_numba.py              |  6 ++--
 .../tests/arrays/string_/test_string_arrow.py |  5 ++--
 pandas/tests/base/test_misc.py                |  4 +--
 pandas/tests/frame/indexing/test_indexing.py  | 10 ++-----
 pandas/tests/frame/methods/test_rank.py       | 14 +++++-----
 pandas/tests/frame/test_constructors.py       |  7 ++---
 pandas/tests/groupby/methods/test_size.py     | 13 ++-------
 .../groupby/methods/test_value_counts.py      | 14 ++--------
 pandas/tests/groupby/test_groupby.py          | 11 ++------
 pandas/tests/groupby/test_reductions.py       |  5 ++--
 .../indexes/base_class/test_constructors.py   |  4 +--
 .../tests/indexes/base_class/test_reshape.py  |  7 ++---
 pandas/tests/indexes/object/test_indexing.py  | 23 ++++-----------
 pandas/tests/indexes/test_base.py             |  5 ++--
 pandas/tests/indexes/test_old_base.py         |  5 +++-
 pandas/tests/interchange/test_impl.py         |  8 ++++--
 pandas/tests/io/json/test_pandas.py           |  8 +++---
 .../io/parser/dtypes/test_dtypes_basic.py     | 11 +++-----
 pandas/tests/io/pytables/test_read.py         |  5 ++--
 pandas/tests/io/test_feather.py               |  4 ++-
 pandas/tests/io/test_orc.py                   |  4 +--
 pandas/tests/io/test_parquet.py               |  8 +++---
 pandas/tests/io/test_sql.py                   |  3 +-
 pandas/tests/reshape/test_get_dummies.py      | 22 +++++++--------
 pandas/tests/reshape/test_melt.py             |  8 +++---
 pandas/tests/series/test_logical_ops.py       |  3 +-
 pandas/tests/strings/test_find_replace.py     |  2 +-
 pandas/tests/util/test_shares_memory.py       |  6 ++--
 29 files changed, 119 insertions(+), 134 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index d11213f1164bc..222aefb4afda8 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1272,6 +1272,34 @@ def string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        ("python", pd.NA),
+        pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
+        pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
+        ("python", np.nan),
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
+)
+def string_dtype_no_object(request):
+    """
+    Parametrized fixture for string dtypes.
+    * 'string[python]' (NA variant)
+    * 'string[pyarrow]' (NA variant)
+    * 'str' (NaN variant, with pyarrow)
+    * 'str' (NaN variant, without pyarrow)
+    """
+    # need to instantiate the StringDtype here instead of in the params
+    # to avoid importing pyarrow during test collection
+    storage, na_value = request.param
+    return pd.StringDtype(storage, na_value)
+
+
 @pytest.fixture(
     params=[
         "string[python]",
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index d86eeadbaa0fe..825d295043e69 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -5,6 +5,7 @@
 
 import pandas.util._test_decorators as td
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -29,11 +30,10 @@ def test_numba_vs_python_noop(float_frame, apply_axis):
 
 def test_numba_vs_python_string_index():
     # GH#56189
-    pytest.importorskip("pyarrow")
     df = DataFrame(
         1,
-        index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
-        columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
+        index=Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
+        columns=Index(["x", "y"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     func = lambda x: x
     result = df.apply(func, engine="numba", axis=0)
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index b042cf632288b..d4363171788d4 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -241,10 +241,11 @@ def test_setitem_invalid_indexer_raises():
         arr[[0, 1]] = ["foo", "bar", "baz"]
 
 
-@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
-def test_pickle_roundtrip(dtype):
+@pytest.mark.parametrize("na_value", [pd.NA, np.nan])
+def test_pickle_roundtrip(na_value):
     # GH 42600
     pytest.importorskip("pyarrow")
+    dtype = StringDtype("pyarrow", na_value=na_value)
     expected = pd.Series(range(10), dtype=dtype)
     expected_sliced = expected.head(2)
     full_pickled = pickle.dumps(expected)
diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
index bbd9b150b88a8..7819b7b75f065 100644
--- a/pandas/tests/base/test_misc.py
+++ b/pandas/tests/base/test_misc.py
@@ -183,9 +183,7 @@ def test_access_by_position(index_flat):
     assert index[-1] == index[size - 1]
 
     msg = f"index {size} is out of bounds for axis 0 with size {size}"
-    if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
-        index.dtype, "string[pyarrow_numpy]"
-    ):
+    if isinstance(index.dtype, pd.StringDtype) and index.dtype.storage == "pyarrow":
         msg = "index out of bounds"
     with pytest.raises(IndexError, match=msg):
         index[size]
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 8ce4e8725d632..0723c3c70091c 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1864,13 +1864,11 @@ def test_adding_new_conditional_column() -> None:
     ("dtype", "infer_string"),
     [
         (object, False),
-        ("string[pyarrow_numpy]", True),
+        (pd.StringDtype(na_value=np.nan), True),
     ],
 )
 def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     # https://github.com/pandas-dev/pandas/issues/56204
-    pytest.importorskip("pyarrow")
-
     df = DataFrame({"a": [1, 2], "b": [3, 4]})
     with pd.option_context("future.infer_string", infer_string):
         df.loc[df["a"] == 1, "c"] = "1"
@@ -1880,16 +1878,14 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     tm.assert_frame_equal(df, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_add_new_column_infer_string():
     # GH#55366
-    pytest.importorskip("pyarrow")
     df = DataFrame({"x": [1]})
     with pd.option_context("future.infer_string", True):
         df.loc[df["x"] == 1, "y"] = "1"
     expected = DataFrame(
-        {"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
-        columns=Index(["x", "y"], dtype=object),
+        {"x": [1], "y": Series(["1"], dtype=pd.StringDtype(na_value=np.nan))},
+        columns=Index(["x", "y"], dtype="str"),
     )
     tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index 4b1435babe6b1..c1cdeaa6c10dd 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -14,6 +14,7 @@
 )
 from pandas.compat import HAS_PYARROW
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -502,14 +503,13 @@ def test_rank_mixed_axis_zero(self, data, expected):
         result = df.rank(numeric_only=True)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "dtype, exp_dtype",
-        [("string[pyarrow]", "Int64"), ("string[pyarrow_numpy]", "float64")],
-    )
-    def test_rank_string_dtype(self, dtype, exp_dtype):
+    def test_rank_string_dtype(self, string_dtype_no_object):
         # GH#55362
-        pytest.importorskip("pyarrow")
-        obj = Series(["foo", "foo", None, "foo"], dtype=dtype)
+        obj = Series(["foo", "foo", None, "foo"], dtype=string_dtype_no_object)
         result = obj.rank(method="first")
+        exp_dtype = "Int64" if string_dtype_no_object.na_value is pd.NA else "float64"
+        if string_dtype_no_object.storage == "python":
+            # TODO nullable string[python] should also return nullable Int64
+            exp_dtype = "float64"
         expected = Series([1, 2, None, 3], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 0176a36fe78d7..3d46e03547c38 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2655,8 +2655,7 @@ def test_construct_with_strings_and_none(self):
 
     def test_frame_string_inference(self):
         # GH#54430
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2690,8 +2689,7 @@ def test_frame_string_inference(self):
 
     def test_frame_string_inference_array_string_dtype(self):
         # GH#54496
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2715,7 +2713,6 @@ def test_frame_string_inference_array_string_dtype(self):
 
     def test_frame_string_inference_block_dim(self):
         # GH#55363
-        pytest.importorskip("pyarrow")
         with pd.option_context("future.infer_string", True):
             df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
         assert df._mgr.blocks[0].ndim == 2
diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py
index edeac642551a0..91200f53e36bd 100644
--- a/pandas/tests/groupby/methods/test_size.py
+++ b/pandas/tests/groupby/methods/test_size.py
@@ -3,8 +3,6 @@
 
 from pandas._config import using_string_dtype
 
-import pandas.util._test_decorators as td
-
 from pandas import (
     DataFrame,
     Index,
@@ -79,16 +77,9 @@ def test_size_series_masked_type_returns_Int64(dtype):
 
 
 @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_size_strings(dtype):
+def test_size_strings(any_string_dtype):
     # GH#55627
+    dtype = any_string_dtype
     df = DataFrame({"a": ["a", "a", "b"], "b": "a"}, dtype=dtype)
     result = df.groupby("a")["b"].size()
     exp_dtype = "Int64" if dtype == "string[pyarrow]" else "int64"
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index da3d626f2d777..8f8f7f64aba75 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas import (
     Categorical,
     CategoricalIndex,
@@ -373,14 +371,6 @@ def test_against_frame_and_seriesgroupby(
             tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
 @pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize(
     "sort, ascending, expected_rows, expected_count, expected_group_size",
@@ -398,9 +388,10 @@ def test_compound(
     expected_rows,
     expected_count,
     expected_group_size,
-    dtype,
+    any_string_dtype,
     using_infer_string,
 ):
+    dtype = any_string_dtype
     education_df = education_df.astype(dtype)
     education_df.columns = education_df.columns.astype(dtype)
     # Multiple groupby keys and as_index=False
@@ -417,6 +408,7 @@ def test_compound(
         expected["proportion"] = expected_count
         expected["proportion"] /= expected_group_size
         if dtype == "string[pyarrow]":
+            # TODO(nullable) also string[python] should return nullable dtypes
             expected["proportion"] = expected["proportion"].convert_dtypes()
     else:
         expected["count"] = expected_count
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 11b874d0b1608..6393468fb8ccd 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2466,20 +2466,13 @@ def test_rolling_wrong_param_min_period():
         test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_by_column_values_with_same_starting_value(dtype):
+def test_by_column_values_with_same_starting_value(any_string_dtype):
     # GH29635
     df = DataFrame(
         {
             "Name": ["Thomas", "Thomas", "Thomas John"],
             "Credit": [1200, 1300, 900],
-            "Mood": Series(["sad", "happy", "happy"], dtype=dtype),
+            "Mood": Series(["sad", "happy", "happy"], dtype=any_string_dtype),
         }
     )
     aggregate_details = {"Mood": Series.mode, "Credit": "sum"}
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 8a421654cdf9b..a6ea1502103c5 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -714,10 +714,9 @@ def test_groupby_min_max_categorical(func):
 
 
 @pytest.mark.parametrize("func", ["min", "max"])
-def test_min_empty_string_dtype(func):
+def test_min_empty_string_dtype(func, string_dtype_no_object):
     # GH#55619
-    pytest.importorskip("pyarrow")
-    dtype = "string[pyarrow_numpy]"
+    dtype = string_dtype_no_object
     df = DataFrame({"a": ["a"], "b": "a", "c": "a"}, dtype=dtype).iloc[:0]
     result = getattr(df.groupby("a"), func)()
     expected = DataFrame(
diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py
index 6036eddce7a01..0896b97e8a40e 100644
--- a/pandas/tests/indexes/base_class/test_constructors.py
+++ b/pandas/tests/indexes/base_class/test_constructors.py
@@ -47,9 +47,7 @@ def test_construct_empty_tuples(self, tuple_list):
 
     def test_index_string_inference(self):
         # GH#54430
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
-        expected = Index(["a", "b"], dtype=dtype)
+        expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
         with pd.option_context("future.infer_string", True):
             ser = Index(["a", "b"])
         tm.assert_index_equal(ser, expected)
diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py
index e17e39a334acc..56cdca49cb2b0 100644
--- a/pandas/tests/indexes/base_class/test_reshape.py
+++ b/pandas/tests/indexes/base_class/test_reshape.py
@@ -57,12 +57,11 @@ def test_insert_datetime_into_object(self, loc, val):
         tm.assert_index_equal(result, expected)
         assert type(expected[2]) is type(val)
 
-    def test_insert_none_into_string_numpy(self):
+    def test_insert_none_into_string_numpy(self, string_dtype_no_object):
         # GH#55365
-        pytest.importorskip("pyarrow")
-        index = Index(["a", "b", "c"], dtype="string[pyarrow_numpy]")
+        index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
         result = index.insert(-1, None)
-        expected = Index(["a", "b", None, "c"], dtype="string[pyarrow_numpy]")
+        expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 2e9ba007a45c1..ea3d068a673e8 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -7,7 +7,6 @@
     NA,
     is_matching_na,
 )
-import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import Index
@@ -160,14 +159,6 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
 
 
 class TestSliceLocs:
-    # TODO(infer_string) parametrize over multiple string dtypes
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            "object",
-            pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        ],
-    )
     @pytest.mark.parametrize(
         "in_slice,expected",
         [
@@ -191,24 +182,22 @@ class TestSliceLocs:
             (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
         ],
     )
-    def test_slice_locs_negative_step(self, in_slice, expected, dtype):
-        index = Index(list("bcdxy"), dtype=dtype)
+    def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
 
         s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
         result = index[s_start : s_stop : in_slice.step]
-        expected = Index(list(expected), dtype=dtype)
+        expected = Index(list(expected), dtype=any_string_dtype)
         tm.assert_index_equal(result, expected)
 
-    # TODO(infer_string) parametrize over multiple string dtypes
-    @td.skip_if_no("pyarrow")
-    def test_slice_locs_negative_step_oob(self):
-        index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")
+    def test_slice_locs_negative_step_oob(self, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
 
         result = index[-10:5:1]
         tm.assert_index_equal(result, index)
 
         result = index[4:-10:-1]
-        expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]")
+        expected = Index(list("yxdcb"), dtype=any_string_dtype)
         tm.assert_index_equal(result, expected)
 
     def test_slice_locs_dup(self):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 486b24845d2ff..2b62b384930d6 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -933,10 +933,9 @@ def test_isin_empty(self, empty):
         result = index.isin(empty)
         tm.assert_numpy_array_equal(expected, result)
 
-    @td.skip_if_no("pyarrow")
-    def test_isin_arrow_string_null(self):
+    def test_isin_string_null(self, string_dtype_no_object):
         # GH#55821
-        index = Index(["a", "b"], dtype="string[pyarrow_numpy]")
+        index = Index(["a", "b"], dtype=string_dtype_no_object)
         result = index.isin([None])
         expected = np.array([False, False])
         tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 75284a8f8fd47..cd3d599abd30e 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -295,7 +295,10 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._ndarray, result._values._ndarray, check_same="same"
                 )
-            elif index.dtype in ("string[pyarrow]", "string[pyarrow_numpy]"):
+            elif (
+                isinstance(index.dtype, StringDtype)
+                and index.dtype.storage == "pyarrow"
+            ):
                 assert tm.shares_memory(result._values, index._values)
             else:
                 raise NotImplementedError(index.dtype)
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index 76910db941d36..38961345dc1f2 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -465,7 +465,7 @@ def test_non_str_names_w_duplicates():
         ([1.0, 2.25, None], "Float32[pyarrow]", "float32"),
         ([True, False, None], "boolean", "bool"),
         ([True, False, None], "boolean[pyarrow]", "bool"),
-        (["much ado", "about", None], "string[pyarrow_numpy]", "large_string"),
+        (["much ado", "about", None], pd.StringDtype(na_value=np.nan), "large_string"),
         (["much ado", "about", None], "string[pyarrow]", "large_string"),
         (
             [datetime(2020, 1, 1), datetime(2020, 1, 2), None],
@@ -528,7 +528,11 @@ def test_pandas_nullable_with_missing_values(
         ([1.0, 2.25, 5.0], "Float32[pyarrow]", "float32"),
         ([True, False, False], "boolean", "bool"),
         ([True, False, False], "boolean[pyarrow]", "bool"),
-        (["much ado", "about", "nothing"], "string[pyarrow_numpy]", "large_string"),
+        (
+            ["much ado", "about", "nothing"],
+            pd.StringDtype(na_value=np.nan),
+            "large_string",
+        ),
         (["much ado", "about", "nothing"], "string[pyarrow]", "large_string"),
         (
             [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 3d07c0219691e..1c54232b8b510 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2245,18 +2245,18 @@ def test_pyarrow_engine_lines_false():
 
 
 def test_json_roundtrip_string_inference(orient):
-    pytest.importorskip("pyarrow")
     df = DataFrame(
         [["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
     )
     out = df.to_json()
     with pd.option_context("future.infer_string", True):
         result = read_json(StringIO(out))
+    dtype = pd.StringDtype(na_value=np.nan)
     expected = DataFrame(
         [["a", "b"], ["c", "d"]],
-        dtype="string[pyarrow_numpy]",
-        index=Index(["row 1", "row 2"], dtype="string[pyarrow_numpy]"),
-        columns=Index(["col 1", "col 2"], dtype="string[pyarrow_numpy]"),
+        dtype=dtype,
+        index=Index(["row 1", "row 2"], dtype=dtype),
+        columns=Index(["col 1", "col 2"], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 07f29518b7881..b664423364f6b 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -547,8 +547,7 @@ def test_ea_int_avoid_overflow(all_parsers):
 
 def test_string_inference(all_parsers):
     # GH#54430
-    pytest.importorskip("pyarrow")
-    dtype = "string[pyarrow_numpy]"
+    dtype = pd.StringDtype(na_value=np.nan)
 
     data = """a,b
 x,1
@@ -568,8 +567,6 @@ def test_string_inference(all_parsers):
 @pytest.mark.parametrize("dtype", ["O", object, "object", np.object_, str, np.str_])
 def test_string_inference_object_dtype(all_parsers, dtype):
     # GH#56047
-    pytest.importorskip("pyarrow")
-
     data = """a,b
 x,a
 y,a
@@ -583,7 +580,7 @@ def test_string_inference_object_dtype(all_parsers, dtype):
             "a": pd.Series(["x", "y", "z"], dtype=object),
             "b": pd.Series(["a", "a", "a"], dtype=object),
         },
-        columns=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
 
@@ -593,9 +590,9 @@ def test_string_inference_object_dtype(all_parsers, dtype):
     expected = DataFrame(
         {
             "a": pd.Series(["x", "y", "z"], dtype=object),
-            "b": pd.Series(["a", "a", "a"], dtype="string[pyarrow_numpy]"),
+            "b": pd.Series(["a", "a", "a"], dtype=pd.StringDtype(na_value=np.nan)),
         },
-        columns=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py
index dd3a0eabe95ae..8ae87d4bab52d 100644
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -310,7 +310,6 @@ def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
 
 def test_read_infer_string(tmp_path, setup_path):
     # GH#54431
-    pytest.importorskip("pyarrow")
     df = DataFrame({"a": ["a", "b", None]})
     path = tmp_path / setup_path
     df.to_hdf(path, key="data", format="table")
@@ -318,8 +317,8 @@ def test_read_infer_string(tmp_path, setup_path):
         result = read_hdf(path, key="data", mode="r")
     expected = DataFrame(
         {"a": ["a", "b", None]},
-        dtype="string[pyarrow_numpy]",
-        columns=Index(["a"], dtype="string[pyarrow_numpy]"),
+        dtype=pd.StringDtype(na_value=np.nan),
+        columns=Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index a1f3babb1ae3b..9721d045b7b91 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -243,5 +243,7 @@ def test_string_inference(self, tmp_path):
         df.to_feather(path)
         with pd.option_context("future.infer_string", True):
             result = read_feather(path)
-        expected = pd.DataFrame(data={"a": ["x", "y"]}, dtype="string[pyarrow_numpy]")
+        expected = pd.DataFrame(
+            data={"a": ["x", "y"]}, dtype=pd.StringDtype(na_value=np.nan)
+        )
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 90133344fdfc9..efb3dffecd856 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -436,7 +436,7 @@ def test_string_inference(tmp_path):
         result = read_orc(path)
     expected = pd.DataFrame(
         data={"a": ["x", "y"]},
-        dtype="string[pyarrow_numpy]",
-        columns=pd.Index(["a"], dtype="string[pyarrow_numpy]"),
+        dtype=pd.StringDtype(na_value=np.nan),
+        columns=pd.Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index a29e479b7c9f1..4c2ea036f08dc 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1109,8 +1109,8 @@ def test_string_inference(self, tmp_path, pa):
             result = read_parquet(path, engine="pyarrow")
         expected = pd.DataFrame(
             data={"a": ["x", "y"]},
-            dtype="string[pyarrow_numpy]",
-            index=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+            dtype=pd.StringDtype(na_value=np.nan),
+            index=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1140,8 +1140,8 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
             result = read_parquet(path)
         expected = pd.DataFrame(
             data={"a": [None, "b", "c"]},
-            dtype="string[pyarrow_numpy]",
-            columns=pd.Index(["a"], dtype="string[pyarrow_numpy]"),
+            dtype=pd.StringDtype(na_value=np.nan),
+            columns=pd.Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 980c88f070b89..c28a33069d23f 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -3809,7 +3809,6 @@ class Test(BaseModel):
 def test_read_sql_string_inference(sqlite_engine):
     conn = sqlite_engine
     # GH#54430
-    pytest.importorskip("pyarrow")
     table = "test"
     df = DataFrame({"a": ["x", "y"]})
     df.to_sql(table, con=conn, index=False, if_exists="replace")
@@ -3817,7 +3816,7 @@ def test_read_sql_string_inference(sqlite_engine):
     with pd.option_context("future.infer_string", True):
         result = read_sql_table(table, conn)
 
-    dtype = "string[pyarrow_numpy]"
+    dtype = pd.StringDtype(na_value=np.nan)
     expected = DataFrame(
         {"a": ["x", "y"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
     )
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index f07c6845366da..9ce2c925a368b 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -708,19 +708,17 @@ def test_get_dummies_ea_dtype_dataframe(self, any_numeric_ea_and_arrow_dtype):
         )
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_if_no("pyarrow")
-    def test_get_dummies_ea_dtype(self):
+    @pytest.mark.parametrize("dtype_type", ["string", "category"])
+    def test_get_dummies_ea_dtype(self, dtype_type, string_dtype_no_object):
         # GH#56273
-        for dtype, exp_dtype in [
-            ("string[pyarrow]", "boolean"),
-            ("string[pyarrow_numpy]", "bool"),
-            (CategoricalDtype(Index(["a"], dtype="string[pyarrow]")), "boolean"),
-            (CategoricalDtype(Index(["a"], dtype="string[pyarrow_numpy]")), "bool"),
-        ]:
-            df = DataFrame({"name": Series(["a"], dtype=dtype), "x": 1})
-            result = get_dummies(df)
-            expected = DataFrame({"x": 1, "name_a": Series([True], dtype=exp_dtype)})
-            tm.assert_frame_equal(result, expected)
+        dtype = string_dtype_no_object
+        exp_dtype = "boolean" if dtype.na_value is pd.NA else "bool"
+        if dtype_type == "category":
+            dtype = CategoricalDtype(Index(["a"], dtype))
+        df = DataFrame({"name": Series(["a"], dtype=dtype), "x": 1})
+        result = get_dummies(df)
+        expected = DataFrame({"x": 1, "name_a": Series([True], dtype=exp_dtype)})
+        tm.assert_frame_equal(result, expected)
 
     @td.skip_if_no("pyarrow")
     def test_get_dummies_arrow_dtype(self):
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index 4a12404f6775a..95aa5291cb45a 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -1242,9 +1242,9 @@ def test_missing_stubname(self, any_string_dtype):
         tm.assert_frame_equal(result, expected)
 
 
-def test_wide_to_long_pyarrow_string_columns():
+def test_wide_to_long_string_columns(string_storage):
     # GH 57066
-    pytest.importorskip("pyarrow")
+    string_dtype = pd.StringDtype(string_storage, na_value=np.nan)
     df = DataFrame(
         {
             "ID": {0: 1},
@@ -1254,7 +1254,7 @@ def test_wide_to_long_pyarrow_string_columns():
             "D": {0: 1},
         }
     )
-    df.columns = df.columns.astype("string[pyarrow_numpy]")
+    df.columns = df.columns.astype(string_dtype)
     result = wide_to_long(
         df, stubnames="R", i="ID", j="UNPIVOTED", sep="_", suffix=".*"
     )
@@ -1264,7 +1264,7 @@ def test_wide_to_long_pyarrow_string_columns():
         index=pd.MultiIndex.from_arrays(
             [
                 [1, 1, 1],
-                Index(["test1", "test2", "test3"], dtype="string[pyarrow_numpy]"),
+                Index(["test1", "test2", "test3"], dtype=string_dtype),
             ],
             names=["ID", "UNPIVOTED"],
         ),
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
index 1586195e79a9d..8516018e8aa93 100644
--- a/pandas/tests/series/test_logical_ops.py
+++ b/pandas/tests/series/test_logical_ops.py
@@ -9,6 +9,7 @@
     DataFrame,
     Index,
     Series,
+    StringDtype,
     bdate_range,
 )
 import pandas._testing as tm
@@ -514,7 +515,7 @@ def test_pyarrow_numpy_string_invalid(self):
         # GH#56008
         pa = pytest.importorskip("pyarrow")
         ser = Series([False, True])
-        ser2 = Series(["a", "b"], dtype="string[pyarrow_numpy]")
+        ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
         result = ser == ser2
         expected_eq = Series(False, index=ser.index)
         tm.assert_series_equal(result, expected_eq)
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index bf01c4996bb32..ea9f89ed129aa 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -22,7 +22,7 @@
 
 
 def using_pyarrow(dtype):
-    return dtype in ("string[pyarrow]", "string[pyarrow_numpy]")
+    return dtype == "string" and dtype.storage == "pyarrow"
 
 
 def test_contains(any_string_dtype):
diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py
index 00a897d574a07..8f1ac93b40247 100644
--- a/pandas/tests/util/test_shares_memory.py
+++ b/pandas/tests/util/test_shares_memory.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -20,10 +22,10 @@ def test_shares_memory_string():
     # GH#55823
     import pyarrow as pa
 
-    obj = pd.array(["a", "b"], dtype="string[pyarrow]")
+    obj = pd.array(["a", "b"], dtype=pd.StringDtype("pyarrow", na_value=pd.NA))
     assert tm.shares_memory(obj, obj)
 
-    obj = pd.array(["a", "b"], dtype="string[pyarrow_numpy]")
+    obj = pd.array(["a", "b"], dtype=pd.StringDtype("pyarrow", na_value=np.nan))
     assert tm.shares_memory(obj, obj)
 
     obj = pd.array(["a", "b"], dtype=pd.ArrowDtype(pa.string()))

From 715585de0d66383c51ce290ad6b18a036254d007 Mon Sep 17 00:00:00 2001
From: aaronchucarroll <120818400+aaronchucarroll@users.noreply.github.com>
Date: Mon, 9 Sep 2024 17:38:04 -0400
Subject: [PATCH 089/176] ENH: Add dtype argument to StringMethods
 get_dummies() (#59577)

---
 doc/source/whatsnew/v3.0.0.rst           |  1 +
 pandas/core/arrays/arrow/array.py        | 15 +++-
 pandas/core/arrays/categorical.py        |  4 +-
 pandas/core/arrays/string_arrow.py       | 19 ++++-
 pandas/core/strings/accessor.py          | 27 ++++++-
 pandas/core/strings/base.py              |  3 +-
 pandas/core/strings/object_array.py      | 13 +++-
 pandas/tests/strings/test_get_dummies.py | 99 ++++++++++++++++++++----
 8 files changed, 154 insertions(+), 27 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 9a29ff4d49966..819318e119668 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -55,6 +55,7 @@ Other enhancements
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
+- :meth:`str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 15f9ba611a642..4edf464be74f1 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -41,6 +41,7 @@
     is_list_like,
     is_numeric_dtype,
     is_scalar,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
@@ -2475,7 +2476,9 @@ def _str_findall(self, pat: str, flags: int = 0) -> Self:
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_get_dummies(self, sep: str = "|"):
+    def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
+        if dtype is None:
+            dtype = np.bool_
         split = pc.split_pattern(self._pa_array, sep)
         flattened_values = pc.list_flatten(split)
         uniques = flattened_values.unique()
@@ -2485,7 +2488,15 @@ def _str_get_dummies(self, sep: str = "|"):
         n_cols = len(uniques)
         indices = pc.index_in(flattened_values, uniques_sorted).to_numpy()
         indices = indices + np.arange(n_rows).repeat(lengths) * n_cols
-        dummies = np.zeros(n_rows * n_cols, dtype=np.bool_)
+        _dtype = pandas_dtype(dtype)
+        dummies_dtype: NpDtype
+        if isinstance(_dtype, np.dtype):
+            dummies_dtype = _dtype
+        else:
+            dummies_dtype = np.bool_
+        dummies = np.zeros(n_rows * n_cols, dtype=dummies_dtype)
+        if dtype == str:
+            dummies[:] = False
         dummies[indices] = True
         dummies = dummies.reshape((n_rows, n_cols))
         result = type(self)(pa.array(list(dummies)))
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c613a345686cc..8e0225b31e17b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2681,11 +2681,11 @@ def _str_map(
         result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
         return take_nd(result, codes, fill_value=na_value)
 
-    def _str_get_dummies(self, sep: str = "|"):
+    def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
         # sep may not be in categories. Just bail on this.
         from pandas.core.arrays import NumpyExtensionArray
 
-        return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep)
+        return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep, dtype)
 
     # ------------------------------------------------------------------------
     # GroupBy Methods
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 1e5adf106752f..fa8c662b68f3c 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -56,6 +56,7 @@
         ArrayLike,
         AxisInt,
         Dtype,
+        NpDtype,
         Scalar,
         Self,
         npt,
@@ -425,12 +426,22 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
             return super()._str_find(sub, start, end)
         return ArrowStringArrayMixin._str_find(self, sub, start, end)
 
-    def _str_get_dummies(self, sep: str = "|"):
-        dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)
+    def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
+        if dtype is None:
+            dtype = np.int64
+        dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(
+            sep, dtype
+        )
         if len(labels) == 0:
-            return np.empty(shape=(0, 0), dtype=np.int64), labels
+            return np.empty(shape=(0, 0), dtype=dtype), labels
         dummies = np.vstack(dummies_pa.to_numpy())
-        return dummies.astype(np.int64, copy=False), labels
+        _dtype = pandas_dtype(dtype)
+        dummies_dtype: NpDtype
+        if isinstance(_dtype, np.dtype):
+            dummies_dtype = _dtype
+        else:
+            dummies_dtype = np.bool_
+        return dummies.astype(dummies_dtype, copy=False), labels
 
     def _convert_int_result(self, result):
         if self.dtype.na_value is np.nan:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index bdb88e981bcda..6d10365a1b968 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -26,6 +26,7 @@
 from pandas.core.dtypes.common import (
     ensure_object,
     is_bool_dtype,
+    is_extension_array_dtype,
     is_integer,
     is_list_like,
     is_object_dtype,
@@ -54,6 +55,8 @@
         Iterator,
     )
 
+    from pandas._typing import NpDtype
+
     from pandas import (
         DataFrame,
         Index,
@@ -2431,7 +2434,11 @@ def wrap(
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
-    def get_dummies(self, sep: str = "|"):
+    def get_dummies(
+        self,
+        sep: str = "|",
+        dtype: NpDtype | None = None,
+    ):
         """
         Return DataFrame of dummy/indicator variables for Series.
 
@@ -2442,6 +2449,8 @@ def get_dummies(self, sep: str = "|"):
         ----------
         sep : str, default "|"
             String to split on.
+        dtype : dtype, default np.int64
+            Data type for new columns. Only a single dtype is allowed.
 
         Returns
         -------
@@ -2466,10 +2475,24 @@ def get_dummies(self, sep: str = "|"):
         0  1  1  0
         1  0  0  0
         2  1  0  1
+
+        >>> pd.Series(["a|b", np.nan, "a|c"]).str.get_dummies(dtype=bool)
+                a      b      c
+        0   True   True    False
+        1   False  False   False
+        2   True   False   True
         """
+        from pandas.core.frame import DataFrame
+
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
-        result, name = self._data.array._str_get_dummies(sep)
+        result, name = self._data.array._str_get_dummies(sep, dtype)
+        if is_extension_array_dtype(dtype) or isinstance(dtype, ArrowDtype):
+            return self._wrap_result(
+                DataFrame(result, columns=name, dtype=dtype),
+                name=name,
+                returns_string=False,
+            )
         return self._wrap_result(
             result,
             name=name,
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index 1281a03e297f9..97d906e3df077 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -16,6 +16,7 @@
     import re
 
     from pandas._typing import (
+        NpDtype,
         Scalar,
         Self,
     )
@@ -163,7 +164,7 @@ def _str_wrap(self, width: int, **kwargs):
         pass
 
     @abc.abstractmethod
-    def _str_get_dummies(self, sep: str = "|"):
+    def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
         pass
 
     @abc.abstractmethod
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index c6b18d7049c57..6211c7b528db9 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -18,6 +18,7 @@
 import pandas._libs.ops as libops
 from pandas.util._exceptions import find_stack_level
 
+from pandas.core.dtypes.common import pandas_dtype
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.strings.base import BaseStringArrayMethods
@@ -398,9 +399,11 @@ def _str_wrap(self, width: int, **kwargs):
         tw = textwrap.TextWrapper(**kwargs)
         return self._str_map(lambda s: "\n".join(tw.wrap(s)))
 
-    def _str_get_dummies(self, sep: str = "|"):
+    def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
         from pandas import Series
 
+        if dtype is None:
+            dtype = np.int64
         arr = Series(self).fillna("")
         try:
             arr = sep + arr + sep
@@ -412,7 +415,13 @@ def _str_get_dummies(self, sep: str = "|"):
             tags.update(ts)
         tags2 = sorted(tags - {""})
 
-        dummies = np.empty((len(arr), len(tags2)), dtype=np.int64)
+        _dtype = pandas_dtype(dtype)
+        dummies_dtype: NpDtype
+        if isinstance(_dtype, np.dtype):
+            dummies_dtype = _dtype
+        else:
+            dummies_dtype = np.bool_
+        dummies = np.empty((len(arr), len(tags2)), dtype=dummies_dtype)
 
         def _isin(test_elements: str, element: str) -> bool:
             return element in test_elements
diff --git a/pandas/tests/strings/test_get_dummies.py b/pandas/tests/strings/test_get_dummies.py
index 31386e4e342ae..0656f505dc745 100644
--- a/pandas/tests/strings/test_get_dummies.py
+++ b/pandas/tests/strings/test_get_dummies.py
@@ -1,4 +1,7 @@
 import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
 
 from pandas import (
     DataFrame,
@@ -8,6 +11,11 @@
     _testing as tm,
 )
 
+try:
+    import pyarrow as pa
+except ImportError:
+    pa = None
+
 
 def test_get_dummies(any_string_dtype):
     s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
@@ -32,22 +40,85 @@ def test_get_dummies_index():
     tm.assert_index_equal(result, expected)
 
 
-def test_get_dummies_with_name_dummy(any_string_dtype):
-    # GH 12180
-    # Dummies named 'name' should work as expected
-    s = Series(["a", "b,name", "b"], dtype=any_string_dtype)
-    result = s.str.get_dummies(",")
-    expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"])
+# GH#47872
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        np.uint8,
+        np.int16,
+        np.uint16,
+        np.int32,
+        np.uint32,
+        np.int64,
+        np.uint64,
+        bool,
+        "Int8",
+        "Int16",
+        "Int32",
+        "Int64",
+        "boolean",
+    ],
+)
+def test_get_dummies_with_dtype(any_string_dtype, dtype):
+    s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
+    result = s.str.get_dummies("|", dtype=dtype)
+    expected = DataFrame(
+        [[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc"), dtype=dtype
+    )
     tm.assert_frame_equal(result, expected)
 
 
-def test_get_dummies_with_name_dummy_index():
-    # GH 12180
-    # Dummies named 'name' should work as expected
-    idx = Index(["a|b", "name|c", "b|name"])
-    result = idx.str.get_dummies("|")
+# GH#47872
+@td.skip_if_no("pyarrow")
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "int8[pyarrow]",
+        "uint8[pyarrow]",
+        "int16[pyarrow]",
+        "uint16[pyarrow]",
+        "int32[pyarrow]",
+        "uint32[pyarrow]",
+        "int64[pyarrow]",
+        "uint64[pyarrow]",
+        "bool[pyarrow]",
+    ],
+)
+def test_get_dummies_with_pyarrow_dtype(any_string_dtype, dtype):
+    s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
+    result = s.str.get_dummies("|", dtype=dtype)
+    expected = DataFrame(
+        [[1, 1, 0], [1, 0, 1], [0, 0, 0]],
+        columns=list("abc"),
+        dtype=dtype,
+    )
+    tm.assert_frame_equal(result, expected)
 
-    expected = MultiIndex.from_tuples(
-        [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name")
+
+# GH#47872
+def test_get_dummies_with_str_dtype(any_string_dtype):
+    s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
+    result = s.str.get_dummies("|", dtype=str)
+    expected = DataFrame(
+        [["T", "T", "F"], ["T", "F", "T"], ["F", "F", "F"]],
+        columns=list("abc"),
+        dtype=str,
     )
-    tm.assert_index_equal(result, expected)
+    tm.assert_frame_equal(result, expected)
+
+
+# GH#47872
+@td.skip_if_no("pyarrow")
+def test_get_dummies_with_pa_str_dtype(any_string_dtype):
+    s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
+    result = s.str.get_dummies("|", dtype="str[pyarrow]")
+    expected = DataFrame(
+        [
+            ["true", "true", "false"],
+            ["true", "false", "true"],
+            ["false", "false", "false"],
+        ],
+        columns=list("abc"),
+        dtype="str[pyarrow]",
+    )
+    tm.assert_frame_equal(result, expected)

From 50ac1907abeef8e6824472988a9f015dcd25bb21 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 10 Sep 2024 01:18:29 -0700
Subject: [PATCH 090/176] BUG (string): Series.str.slice with negative step
 (#59724)

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v2.3.0.rst             |  3 +-
 pandas/core/arrays/_arrow_string_mixins.py | 32 ++++++++++++++++------
 pandas/core/arrays/arrow/array.py          | 11 --------
 pandas/core/arrays/string_arrow.py         | 14 +---------
 pandas/tests/extension/test_arrow.py       |  1 +
 pandas/tests/strings/test_strings.py       |  1 +
 6 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 03355f655eb28..03b3a6b55dff6 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -103,8 +103,9 @@ Conversion
 Strings
 ^^^^^^^
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
+- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
 - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
-
+-
 
 Interval
 ^^^^^^^^
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 950d4cd7cc92e..32fa5e7c383b5 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -11,6 +11,7 @@
 
 from pandas.compat import (
     pa_version_under10p1,
+    pa_version_under11p0,
     pa_version_under13p0,
     pa_version_under17p0,
 )
@@ -22,10 +23,7 @@
     import pyarrow.compute as pc
 
 if TYPE_CHECKING:
-    from collections.abc import (
-        Callable,
-        Sized,
-    )
+    from collections.abc import Callable
 
     from pandas._typing import (
         Scalar,
@@ -34,7 +32,7 @@
 
 
 class ArrowStringArrayMixin:
-    _pa_array: Sized
+    _pa_array: pa.ChunkedArray
 
     def __init__(self, *args, **kwargs) -> None:
         raise NotImplementedError
@@ -96,13 +94,29 @@ def _str_get(self, i: int) -> Self:
         selected = pc.utf8_slice_codeunits(
             self._pa_array, start=start, stop=stop, step=step
         )
-        null_value = pa.scalar(
-            None,
-            type=self._pa_array.type,  # type: ignore[attr-defined]
-        )
+        null_value = pa.scalar(None, type=self._pa_array.type)
         result = pc.if_else(not_out_of_bounds, selected, null_value)
         return type(self)(result)
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ) -> Self:
+        if pa_version_under11p0:
+            # GH#59724
+            result = self._apply_elementwise(lambda val: val[start:stop:step])
+            return type(self)(pa.chunked_array(result, type=self._pa_array.type))
+        if start is None:
+            if step is not None and step < 0:
+                # GH#59710
+                start = -1
+            else:
+                start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_slice_replace(
         self, start: int | None = None, stop: int | None = None, repl: str | None = None
     ) -> Self:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 4edf464be74f1..41d40d8304e8f 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2394,17 +2394,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ) -> Self:
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
     def _str_len(self) -> Self:
         return type(self)(pc.utf8_length(self._pa_array))
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index fa8c662b68f3c..73dc822bb8ef5 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -294,6 +294,7 @@ def astype(self, dtype, copy: bool = True):
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
     _str_pad = ArrowStringArrayMixin._str_pad
+    _str_slice = ArrowStringArrayMixin._str_slice
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -352,19 +353,6 @@ def _str_fullmatch(
             pat = f"{pat}$"
         return self._str_match(pat, case, flags, na)
 
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ) -> Self:
-        if stop is None:
-            return super()._str_slice(start, stop, step)
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
         return self._convert_int_result(result)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index fc4f14882b9d7..f86d927ddda67 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2036,6 +2036,7 @@ def test_str_join_string_type():
         [None, 2, None, ["ab", None]],
         [None, 2, 1, ["ab", None]],
         [1, 3, 1, ["bc", None]],
+        (None, None, -1, ["dcba", None]),
     ],
 )
 def test_str_slice(start, stop, step, exp):
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 1ce46497c3c22..4995b448f7e94 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -394,6 +394,7 @@ def test_pipe_failures(any_string_dtype):
         (2, 5, None, ["foo", "bar", np.nan, "baz"]),
         (0, 3, -1, ["", "", np.nan, ""]),
         (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
+        (None, 2, -1, ["owtoo", "owtra", np.nan, "xuqza"]),
         (3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
         (3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
     ],

From de51d336d10f198cb5594ba55530c9401b4eff18 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 10 Sep 2024 16:35:18 +0200
Subject: [PATCH 091/176] String dtype: remove fallback Perfomance warnings for
 string methods (#59760)

---
 pandas/core/arrays/arrow/_arrow_utils.py  |  19 ----
 pandas/core/arrays/string_arrow.py        |   8 --
 pandas/tests/extension/test_string.py     |   1 -
 pandas/tests/indexes/test_setops.py       |  12 ---
 pandas/tests/strings/test_find_replace.py | 103 ++++++----------------
 pandas/tests/strings/test_string_array.py |   1 -
 6 files changed, 27 insertions(+), 117 deletions(-)

diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py
index cbc9ce0252750..285c3fd465ffc 100644
--- a/pandas/core/arrays/arrow/_arrow_utils.py
+++ b/pandas/core/arrays/arrow/_arrow_utils.py
@@ -1,27 +1,8 @@
 from __future__ import annotations
 
-import warnings
-
 import numpy as np
 import pyarrow
 
-from pandas._config.config import get_option
-
-from pandas.errors import PerformanceWarning
-from pandas.util._exceptions import find_stack_level
-
-
-def fallback_performancewarning(version: str | None = None) -> None:
-    """
-    Raise a PerformanceWarning for falling back to ExtensionArray's
-    non-pyarrow method
-    """
-    if get_option("performance_warnings"):
-        msg = "Falling back on a non-pyarrow code path which may decrease performance."
-        if version is not None:
-            msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
-        warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
-
 
 def pyarrow_array_to_numpy_and_mask(
     arr, dtype: np.dtype
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 73dc822bb8ef5..a669b6d669b48 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -10,8 +10,6 @@
 
 import numpy as np
 
-from pandas._config.config import get_option
-
 from pandas._libs import (
     lib,
     missing as libmissing,
@@ -43,8 +41,6 @@
     import pyarrow as pa
     import pyarrow.compute as pc
 
-    from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
-
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -300,8 +296,6 @@ def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
     ):
         if flags:
-            if get_option("mode.performance_warnings"):
-                fallback_performancewarning()
             return super()._str_contains(pat, case, flags, na, regex)
 
         if not isna(na):
@@ -327,8 +321,6 @@ def _str_replace(
         regex: bool = True,
     ):
         if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
-            if get_option("mode.performance_warnings"):
-                fallback_performancewarning()
             return super()._str_replace(pat, repl, n, case, flags, regex)
 
         return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 17f6eb8282b23..509ae653e4793 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -209,7 +209,6 @@ def test_compare_scalar(self, data, comparison_op):
         ser = pd.Series(data)
         self._compare_other(ser, data, comparison_op, "abc")
 
-    @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 8fd349dacf9e9..e5dc47be20677 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -246,9 +246,6 @@ def test_intersection_base(self, index):
             with pytest.raises(TypeError, match=msg):
                 first.intersection([1, 2, 3])
 
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_union_base(self, index):
         index = index.unique()
@@ -276,9 +273,6 @@ def test_union_base(self, index):
                 first.union([1, 2, 3])
 
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     def test_difference_base(self, sort, index):
         first = index[2:]
         second = index[:4]
@@ -305,9 +299,6 @@ def test_difference_base(self, sort, index):
                 first.difference([1, 2, 3], sort)
 
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     def test_symmetric_difference(self, index):
         if isinstance(index, CategoricalIndex):
             pytest.skip(f"Not relevant for {type(index).__name__}")
@@ -529,9 +520,6 @@ def test_intersection_difference_match_empty(self, index, sort):
 
 
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-@pytest.mark.filterwarnings(
-    "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-)
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
 )
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index ea9f89ed129aa..f3698a2ea33cf 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -21,10 +21,6 @@
 # --------------------------------------------------------------------------------------
 
 
-def using_pyarrow(dtype):
-    return dtype == "string" and dtype.storage == "pyarrow"
-
-
 def test_contains(any_string_dtype):
     values = np.array(
         ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
@@ -458,13 +454,10 @@ def test_replace_mixed_object():
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_unicode(any_string_dtype, performance_warning):
+def test_replace_unicode(any_string_dtype):
     ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
+    result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
     tm.assert_series_equal(result, expected)
 
 
@@ -478,16 +471,13 @@ def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl,
         obj.str.replace("a", repl)
 
 
-def test_replace_callable(any_string_dtype, performance_warning):
+def test_replace_callable(any_string_dtype):
     # GH 15055
     ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
     # test with callable
     repl = lambda m: m.group(0).swapcase()
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
+    result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -495,7 +485,7 @@ def test_replace_callable(any_string_dtype, performance_warning):
 @pytest.mark.parametrize(
     "repl", [lambda: None, lambda m, x: None, lambda m, x, y=None: None]
 )
-def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
+def test_replace_callable_raises(any_string_dtype, repl):
     # GH 15055
     values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
@@ -504,43 +494,31 @@ def test_replace_callable_raises(any_string_dtype, performance_warning, repl):
         r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
         r"(?(3)required )positional arguments?"
     )
-    if not using_pyarrow(any_string_dtype):
-        performance_warning = False
     with pytest.raises(TypeError, match=msg):
-        with tm.assert_produces_warning(performance_warning):
-            values.str.replace("a", repl, regex=True)
+        values.str.replace("a", repl, regex=True)
 
 
-def test_replace_callable_named_groups(any_string_dtype, performance_warning):
+def test_replace_callable_named_groups(any_string_dtype):
     # test regex named groups
     ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
     pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
     repl = lambda m: m.group("middle").swapcase()
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(pat, repl, regex=True)
+    result = ser.str.replace(pat, repl, regex=True)
     expected = Series(["bAR", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_compiled_regex(any_string_dtype, performance_warning):
+def test_replace_compiled_regex(any_string_dtype):
     # GH 15446
     ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
     # test with compiled regex
     pat = re.compile(r"BAD_*")
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(pat, "", regex=True)
+    result = ser.str.replace(pat, "", regex=True)
     expected = Series(["foobar", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(pat, "", n=1, regex=True)
+    result = ser.str.replace(pat, "", n=1, regex=True)
     expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -557,14 +535,11 @@ def test_replace_compiled_regex_mixed_object():
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_compiled_regex_unicode(any_string_dtype, performance_warning):
+def test_replace_compiled_regex_unicode(any_string_dtype):
     ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(pat, ", ", regex=True)
+    result = ser.str.replace(pat, ", ", regex=True)
     tm.assert_series_equal(result, expected)
 
 
@@ -586,15 +561,12 @@ def test_replace_compiled_regex_raises(any_string_dtype):
         ser.str.replace(pat, "", case=True, regex=True)
 
 
-def test_replace_compiled_regex_callable(any_string_dtype, performance_warning):
+def test_replace_compiled_regex_callable(any_string_dtype):
     # test with callable
     ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
     repl = lambda m: m.group(0).swapcase()
     pat = re.compile("[a-z][A-Z]{2}")
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace(pat, repl, n=2, regex=True)
+    result = ser.str.replace(pat, repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -626,7 +598,7 @@ def test_replace_literal_compiled_raises(any_string_dtype):
         ser.str.replace(pat, "", regex=False)
 
 
-def test_replace_moar(any_string_dtype, performance_warning):
+def test_replace_moar(any_string_dtype):
     # PR #1179
     ser = Series(
         ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
@@ -640,10 +612,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
     )
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace("A", "YYY", case=False)
+    result = ser.str.replace("A", "YYY", case=False)
     expected = Series(
         [
             "YYY",
@@ -661,10 +630,7 @@ def test_replace_moar(any_string_dtype, performance_warning):
     )
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
+    result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
     expected = Series(
         [
             "A",
@@ -683,21 +649,15 @@ def test_replace_moar(any_string_dtype, performance_warning):
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_not_case_sensitive_not_regex(any_string_dtype, performance_warning):
+def test_replace_not_case_sensitive_not_regex(any_string_dtype):
     # https://github.com/pandas-dev/pandas/issues/41602
     ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace("a", "c", case=False, regex=False)
+    result = ser.str.replace("a", "c", case=False, regex=False)
     expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.replace("a.", "c.", case=False, regex=False)
+    result = ser.str.replace("a.", "c.", case=False, regex=False)
     expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -853,7 +813,7 @@ def test_fullmatch_na_kwarg(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
-def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
+def test_fullmatch_case_kwarg(any_string_dtype):
     ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
     expected_dtype = (
         np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
@@ -869,10 +829,7 @@ def test_fullmatch_case_kwarg(any_string_dtype, performance_warning):
     result = ser.str.fullmatch("ab", case=False)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(
-        performance_warning, using_pyarrow(any_string_dtype)
-    ):
-        result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
+    result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
     tm.assert_series_equal(result, expected)
 
 
@@ -1046,7 +1003,7 @@ def test_translate_mixed_object():
 # --------------------------------------------------------------------------------------
 
 
-def test_flags_kwarg(any_string_dtype, performance_warning):
+def test_flags_kwarg(any_string_dtype):
     data = {
         "Dave": "dave@google.com",
         "Steve": "steve@gmail.com",
@@ -1057,17 +1014,13 @@ def test_flags_kwarg(any_string_dtype, performance_warning):
 
     pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
 
-    use_pyarrow = using_pyarrow(any_string_dtype)
-
     result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
     assert result.iloc[0].tolist() == ["dave", "google", "com"]
 
-    with tm.maybe_produces_warning(performance_warning, use_pyarrow):
-        result = data.str.match(pat, flags=re.IGNORECASE)
+    result = data.str.match(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
 
-    with tm.maybe_produces_warning(performance_warning, use_pyarrow):
-        result = data.str.fullmatch(pat, flags=re.IGNORECASE)
+    result = data.str.fullmatch(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
 
     result = data.str.findall(pat, flags=re.IGNORECASE)
@@ -1077,8 +1030,6 @@ def test_flags_kwarg(any_string_dtype, performance_warning):
     assert result.iloc[0] == 1
 
     msg = "has match groups"
-    with tm.assert_produces_warning(
-        UserWarning, match=msg, raise_on_extra_warnings=not use_pyarrow
-    ):
+    with tm.assert_produces_warning(UserWarning, match=msg):
         result = data.str.contains(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py
index 0b3f368afea5e..517ddb164985c 100644
--- a/pandas/tests/strings/test_string_array.py
+++ b/pandas/tests/strings/test_string_array.py
@@ -12,7 +12,6 @@
 )
 
 
-@pytest.mark.filterwarnings("ignore:Falling back")
 def test_string_array(nullable_string_dtype, any_string_method):
     method_name, args, kwargs = any_string_method
 

From 16b7288eccdf29efcb430616e77cd701497fe8ed Mon Sep 17 00:00:00 2001
From: ammar-qazi <ammmarqz@gmail.com>
Date: Tue, 10 Sep 2024 19:11:32 +0200
Subject: [PATCH 092/176] DOC: Add docstring for Extensionarray interpolate
 (#59749)

* Update docstring of Extensionarray.interpolate

* Remove Extensionarray.interpolate from code_checks.sh

* Resolving pre-commit errors

* Resolving pre-commit errors 2

* Resolved ruff formatting error

* Fix issues after review
---
 ci/code_checks.sh          |  1 -
 pandas/core/arrays/base.py | 78 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 06078d8958492..2aa256b65a493 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -116,7 +116,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
         -i "pandas.api.types.is_dict_like PR07,SA01" \
         -i "pandas.api.types.is_extension_array_dtype SA01" \
         -i "pandas.api.types.is_file_like PR07,SA01" \
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 536c7303a2f92..a933a9ce11646 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -999,16 +999,74 @@ def interpolate(
         **kwargs,
     ) -> Self:
         """
-        See DataFrame.interpolate.__doc__.
+        Fill NaN values using an interpolation method.
+
+        Parameters
+        ----------
+        method : str, default 'linear'
+            Interpolation technique to use. One of:
+            * 'linear': Ignore the index and treat the values as equally spaced.
+            This is the only method supported on MultiIndexes.
+            * 'time': Works on daily and higher resolution data to interpolate
+            given length of interval.
+            * 'index', 'values': use the actual numerical values of the index.
+            * 'pad': Fill in NaNs using existing values.
+            * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
+            'polynomial': Passed to scipy.interpolate.interp1d, whereas 'spline'
+            is passed to scipy.interpolate.UnivariateSpline. These methods use
+            the numerical values of the index.
+            Both 'polynomial' and 'spline' require that you also specify an
+            order (int), e.g. arr.interpolate(method='polynomial', order=5).
+            * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
+            'cubicspline': Wrappers around the SciPy interpolation methods
+            of similar names. See Notes.
+            * 'from_derivatives': Refers to scipy.interpolate.BPoly.from_derivatives.
+        axis : int
+            Axis to interpolate along. For 1-dimensional data, use 0.
+        index : Index
+            Index to use for interpolation.
+        limit : int or None
+            Maximum number of consecutive NaNs to fill. Must be greater than 0.
+        limit_direction : {'forward', 'backward', 'both'}
+            Consecutive NaNs will be filled in this direction.
+        limit_area : {'inside', 'outside'} or None
+            If limit is specified, consecutive NaNs will be filled with this
+            restriction.
+            * None: No fill restriction.
+            * 'inside': Only fill NaNs surrounded by valid values (interpolate).
+            * 'outside': Only fill NaNs outside valid values (extrapolate).
+        copy : bool
+            If True, a copy of the object is returned with interpolated values.
+        **kwargs : optional
+            Keyword arguments to pass on to the interpolating function.
+
+        Returns
+        -------
+        ExtensionArray
+            An ExtensionArray with interpolated values.
+
+        See Also
+        --------
+        Series.interpolate : Interpolate values in a Series.
+        DataFrame.interpolate : Interpolate values in a DataFrame.
+
+        Notes
+        -----
+        - All parameters must be specified as keyword arguments.
+        - The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
+          methods are wrappers around the respective SciPy implementations of
+          similar names. These use the actual numerical values of the index.
 
         Examples
         --------
+        Interpolating values in a NumPy array:
+
         >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3]))
         >>> arr.interpolate(
         ...     method="linear",
         ...     limit=3,
         ...     limit_direction="forward",
-        ...     index=pd.Index([1, 2, 3, 4]),
+        ...     index=pd.Index(range(len(arr))),
         ...     fill_value=1,
         ...     copy=False,
         ...     axis=0,
@@ -1017,6 +1075,22 @@ def interpolate(
         <NumpyExtensionArray>
         [0.0, 1.0, 2.0, 3.0]
         Length: 4, dtype: float64
+
+        Interpolating values in a FloatingArray:
+
+        >>> arr = pd.array([1.0, pd.NA, 3.0, 4.0, pd.NA, 6.0], dtype="Float64")
+        >>> arr.interpolate(
+        ...     method="linear",
+        ...     axis=0,
+        ...     index=pd.Index(range(len(arr))),
+        ...     limit=None,
+        ...     limit_direction="both",
+        ...     limit_area=None,
+        ...     copy=True,
+        ... )
+        <FloatingArray>
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+        Length: 6, dtype: Float64
         """
         # NB: we return type(self) even if copy=False
         raise NotImplementedError(

From 4444e5279b2a42b927044d65cbd894abd33fa724 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 11 Sep 2024 12:40:01 -0700
Subject: [PATCH 093/176] REF (string): de-duplicate ArrowStringArray methods
 (#59555)

---
 pandas/core/arrays/_arrow_string_mixins.py |  83 ++++++++++++++++
 pandas/core/arrays/arrow/array.py          |  86 +----------------
 pandas/core/arrays/string_arrow.py         | 106 ++++-----------------
 3 files changed, 103 insertions(+), 172 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 32fa5e7c383b5..aa5b28c71b12a 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from functools import partial
+import re
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -48,6 +49,37 @@ def _convert_int_result(self, result):
     def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
         raise NotImplementedError
 
+    def _str_len(self):
+        result = pc.utf8_length(self._pa_array)
+        return self._convert_int_result(result)
+
+    def _str_lower(self) -> Self:
+        return type(self)(pc.utf8_lower(self._pa_array))
+
+    def _str_upper(self) -> Self:
+        return type(self)(pc.utf8_upper(self._pa_array))
+
+    def _str_strip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_trim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_trim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
+    def _str_lstrip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_ltrim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
+    def _str_rstrip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_rtrim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
     def _str_pad(
         self,
         width: int,
@@ -128,6 +160,33 @@ def _str_slice_replace(
             stop = np.iinfo(np.int64).max
         return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
 
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ) -> Self:
+        if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
+            raise NotImplementedError(
+                "replace is not supported with a re.Pattern, callable repl, "
+                "case=False, or flags!=0"
+            )
+
+        func = pc.replace_substring_regex if regex else pc.replace_substring
+        # https://github.com/apache/arrow/issues/39149
+        # GH 56404, unexpected behavior with negative max_replacements with pyarrow.
+        pa_max_replacements = None if n < 0 else n
+        result = func(
+            self._pa_array,
+            pattern=pat,
+            replacement=repl,
+            max_replacements=pa_max_replacements,
+        )
+        return type(self)(result)
+
     def _str_capitalize(self) -> Self:
         return type(self)(pc.utf8_capitalize(self._pa_array))
 
@@ -137,6 +196,16 @@ def _str_title(self) -> Self:
     def _str_swapcase(self) -> Self:
         return type(self)(pc.utf8_swapcase(self._pa_array))
 
+    def _str_removeprefix(self, prefix: str):
+        if not pa_version_under13p0:
+            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
+            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
+            result = pc.if_else(starts_with, removed, self._pa_array)
+            return type(self)(result)
+        predicate = lambda val: val.removeprefix(prefix)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
     def _str_removesuffix(self, suffix: str):
         ends_with = pc.ends_with(self._pa_array, pattern=suffix)
         removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
@@ -228,6 +297,20 @@ def _str_contains(
             result = result.fill_null(na)
         return self._convert_bool_result(result)
 
+    def _str_match(
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
+        if not pat.startswith("^"):
+            pat = f"^{pat}"
+        return self._str_contains(pat, case, flags, na, regex=True)
+
+    def _str_fullmatch(
+        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
+        if not pat.endswith("$") or pat.endswith("\\$"):
+            pat = f"{pat}$"
+        return self._str_match(pat, case, flags, na)
+
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
         if (
             pa_version_under13p0
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 41d40d8304e8f..bd94447f0cd80 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1999,7 +1999,7 @@ def _rank(
         """
         See Series.rank.__doc__.
         """
-        return type(self)(
+        return self._convert_int_result(
             self._rank_calc(
                 axis=axis,
                 method=method,
@@ -2323,36 +2323,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
             raise NotImplementedError(f"count not implemented with {flags=}")
         return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
-    def _result_converter(self, result):
-        return type(self)(result)
-
-    def _str_replace(
-        self,
-        pat: str | re.Pattern,
-        repl: str | Callable,
-        n: int = -1,
-        case: bool = True,
-        flags: int = 0,
-        regex: bool = True,
-    ) -> Self:
-        if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
-            raise NotImplementedError(
-                "replace is not supported with a re.Pattern, callable repl, "
-                "case=False, or flags!=0"
-            )
-
-        func = pc.replace_substring_regex if regex else pc.replace_substring
-        # https://github.com/apache/arrow/issues/39149
-        # GH 56404, unexpected behavior with negative max_replacements with pyarrow.
-        pa_max_replacements = None if n < 0 else n
-        result = func(
-            self._pa_array,
-            pattern=pat,
-            replacement=repl,
-            max_replacements=pa_max_replacements,
-        )
-        return type(self)(result)
-
     def _str_repeat(self, repeats: int | Sequence[int]) -> Self:
         if not isinstance(repeats, int):
             raise NotImplementedError(
@@ -2360,20 +2330,6 @@ def _str_repeat(self, repeats: int | Sequence[int]) -> Self:
             )
         return type(self)(pc.binary_repeat(self._pa_array, repeats))
 
-    def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ) -> Self:
-        if not pat.startswith("^"):
-            pat = f"^{pat}"
-        return self._str_contains(pat, case, flags, na, regex=True)
-
-    def _str_fullmatch(
-        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ) -> Self:
-        if not pat.endswith("$") or pat.endswith("\\$"):
-            pat = f"{pat}$"
-        return self._str_match(pat, case, flags, na)
-
     def _str_join(self, sep: str) -> Self:
         if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
             self._pa_array.type
@@ -2394,46 +2350,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_len(self) -> Self:
-        return type(self)(pc.utf8_length(self._pa_array))
-
-    def _str_lower(self) -> Self:
-        return type(self)(pc.utf8_lower(self._pa_array))
-
-    def _str_upper(self) -> Self:
-        return type(self)(pc.utf8_upper(self._pa_array))
-
-    def _str_strip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_trim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_trim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_lstrip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_ltrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_rstrip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_rtrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_removeprefix(self, prefix: str):
-        if not pa_version_under13p0:
-            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
-            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
-            result = pc.if_else(starts_with, removed, self._pa_array)
-            return type(self)(result)
-        predicate = lambda val: val.removeprefix(prefix)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
     def _str_casefold(self) -> Self:
         predicate = lambda val: val.casefold()
         result = self._apply_elementwise(predicate)
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index a669b6d669b48..f446cc5bde147 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -50,10 +50,8 @@
 
     from pandas._typing import (
         ArrayLike,
-        AxisInt,
         Dtype,
         NpDtype,
-        Scalar,
         Self,
         npt,
     )
@@ -290,6 +288,20 @@ def astype(self, dtype, copy: bool = True):
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
     _str_pad = ArrowStringArrayMixin._str_pad
+    _str_match = ArrowStringArrayMixin._str_match
+    _str_fullmatch = ArrowStringArrayMixin._str_fullmatch
+    _str_lower = ArrowStringArrayMixin._str_lower
+    _str_upper = ArrowStringArrayMixin._str_upper
+    _str_strip = ArrowStringArrayMixin._str_strip
+    _str_lstrip = ArrowStringArrayMixin._str_lstrip
+    _str_rstrip = ArrowStringArrayMixin._str_rstrip
+    _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
+    _str_get = ArrowStringArrayMixin._str_get
+    _str_capitalize = ArrowStringArrayMixin._str_capitalize
+    _str_title = ArrowStringArrayMixin._str_title
+    _str_swapcase = ArrowStringArrayMixin._str_swapcase
+    _str_slice_replace = ArrowStringArrayMixin._str_slice_replace
+    _str_len = ArrowStringArrayMixin._str_len
     _str_slice = ArrowStringArrayMixin._str_slice
 
     def _str_contains(
@@ -323,73 +335,21 @@ def _str_replace(
         if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
             return super()._str_replace(pat, repl, n, case, flags, regex)
 
-        return ArrowExtensionArray._str_replace(self, pat, repl, n, case, flags, regex)
+        return ArrowStringArrayMixin._str_replace(
+            self, pat, repl, n, case, flags, regex
+        )
 
     def _str_repeat(self, repeats: int | Sequence[int]):
         if not isinstance(repeats, int):
             return super()._str_repeat(repeats)
         else:
-            return type(self)(pc.binary_repeat(self._pa_array, repeats))
-
-    def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.startswith("^"):
-            pat = f"^{pat}"
-        return self._str_contains(pat, case, flags, na, regex=True)
-
-    def _str_fullmatch(
-        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.endswith("$") or pat.endswith("\\$"):
-            pat = f"{pat}$"
-        return self._str_match(pat, case, flags, na)
-
-    def _str_len(self):
-        result = pc.utf8_length(self._pa_array)
-        return self._convert_int_result(result)
-
-    def _str_lower(self) -> Self:
-        return type(self)(pc.utf8_lower(self._pa_array))
-
-    def _str_upper(self) -> Self:
-        return type(self)(pc.utf8_upper(self._pa_array))
-
-    def _str_strip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_trim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_trim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_lstrip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_ltrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_rstrip(self, to_strip=None) -> Self:
-        if to_strip is None:
-            result = pc.utf8_rtrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
+            return ArrowExtensionArray._str_repeat(self, repeats=repeats)
 
     def _str_removeprefix(self, prefix: str):
         if not pa_version_under13p0:
-            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
-            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
-            result = pc.if_else(starts_with, removed, self._pa_array)
-            return type(self)(result)
+            return ArrowStringArrayMixin._str_removeprefix(self, prefix)
         return super()._str_removeprefix(prefix)
 
-    def _str_removesuffix(self, suffix: str):
-        ends_with = pc.ends_with(self._pa_array, pattern=suffix)
-        removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
-        result = pc.if_else(ends_with, removed, self._pa_array)
-        return type(self)(result)
-
     def _str_count(self, pat: str, flags: int = 0):
         if flags:
             return super()._str_count(pat, flags)
@@ -456,28 +416,6 @@ def _reduce(
         else:
             return result
 
-    def _rank(
-        self,
-        *,
-        axis: AxisInt = 0,
-        method: str = "average",
-        na_option: str = "keep",
-        ascending: bool = True,
-        pct: bool = False,
-    ):
-        """
-        See Series.rank.__doc__.
-        """
-        return self._convert_int_result(
-            self._rank_calc(
-                axis=axis,
-                method=method,
-                na_option=na_option,
-                ascending=ascending,
-                pct=pct,
-            )
-        )
-
     def value_counts(self, dropna: bool = True) -> Series:
         result = super().value_counts(dropna=dropna)
         if self.dtype.na_value is np.nan:
@@ -499,9 +437,3 @@ def _cmp_method(self, other, op):
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _na_value = np.nan
-    _str_get = ArrowStringArrayMixin._str_get
-    _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
-    _str_capitalize = ArrowStringArrayMixin._str_capitalize
-    _str_title = ArrowStringArrayMixin._str_title
-    _str_swapcase = ArrowStringArrayMixin._str_swapcase
-    _str_slice_replace = ArrowStringArrayMixin._str_slice_replace

From 7acb9659afafbe308d2d78345021487aa7f2f73f Mon Sep 17 00:00:00 2001
From: sshu2017 <66704517+sshu2017@users.noreply.github.com>
Date: Wed, 11 Sep 2024 18:15:52 -0700
Subject: [PATCH 094/176] Fix/na_values_GH59303 (#59755)

* fixed GH#59303

* pre-commit done

* updated v3.0.0.rst

* sort my entry in v3.0.0.rst

* changes based on comments on PR

* reformat long lines

* reformat test_na_values.py

* reformat test_na_values.py again
---
 doc/source/whatsnew/v3.0.0.rst           |  1 +
 pandas/io/parsers/readers.py             |  2 +-
 pandas/tests/io/parser/test_na_values.py | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 819318e119668..89a1c388b3ba1 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -627,6 +627,7 @@ I/O
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
+- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 2916e4d98cce4..ffc2690a5efdf 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1648,7 +1648,7 @@ def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = T
             if keep_default_na:
                 v = set(v) | STR_NA_VALUES
 
-            na_values[k] = v
+            na_values[k] = _stringify_na_values(v, floatify)
         na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
     else:
         if not is_list_like(na_values):
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 360a5feebe073..b612e60c959b1 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -812,3 +812,21 @@ def test_bool_and_nan_to_float(all_parsers):
     result = parser.read_csv(StringIO(data), dtype="float")
     expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]})
     tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "na_values",
+    [[-99.0, -99], [-99, -99.0]],
+)
+def test_na_values_dict_without_dtype(all_parsers, na_values):
+    parser = all_parsers
+    data = """A
+-99
+-99
+-99.0
+-99.0"""
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    expected = DataFrame({"A": [np.nan, np.nan, np.nan, np.nan]})
+    tm.assert_frame_equal(result, expected)

From 2a3cf8300b183f4230cc9dd4911604e454134450 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 12 Sep 2024 08:39:37 +0200
Subject: [PATCH 095/176] BUG: avoid triggering numpy deprecation warning in
 assert functions for nested array with empty array/list (#59778)

---
 pandas/_libs/lib.pyx                          |  2 ++
 pandas/tests/dtypes/test_missing.py           | 12 +-----------
 pandas/tests/series/methods/test_equals.py    | 12 +-----------
 pandas/tests/util/test_assert_almost_equal.py |  4 ++++
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 75f58f565dd6f..3f2dfbfb3b404 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -600,6 +600,8 @@ def array_equivalent_object(ndarray left, ndarray right) -> bool:
                     if not array_equivalent(x, y):
                         return False
 
+            elif PyArray_Check(x) or PyArray_Check(y):
+                return False
             elif (x is C_NA) ^ (y is C_NA):
                 return False
             elif not (
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index f86ed6f49759f..73c462d492d2d 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -1,4 +1,3 @@
-from contextlib import nullcontext
 from datetime import datetime
 from decimal import Decimal
 
@@ -7,7 +6,6 @@
 
 from pandas._libs import missing as libmissing
 from pandas._libs.tslibs import iNaT
-from pandas.compat.numpy import np_version_gte1p25
 
 from pandas.core.dtypes.common import (
     is_float,
@@ -458,15 +456,7 @@ def test_array_equivalent_dti(dtype_equal):
 )
 def test_array_equivalent_series(val):
     arr = np.array([1, 2])
-    msg = "elementwise comparison failed"
-    cm = (
-        # stacklevel is chosen to make sense when called from .equals
-        tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False)
-        if isinstance(val, str) and not np_version_gte1p25
-        else nullcontext()
-    )
-    with cm:
-        assert not array_equivalent(Series([arr, arr]), Series([arr, val]))
+    assert not array_equivalent(Series([arr, arr]), Series([arr, val]))
 
 
 def test_array_equivalent_array_mismatched_shape():
diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py
index b94723b7cbddf..0c52eacd7e516 100644
--- a/pandas/tests/series/methods/test_equals.py
+++ b/pandas/tests/series/methods/test_equals.py
@@ -1,11 +1,9 @@
-from contextlib import nullcontext
 import copy
 
 import numpy as np
 import pytest
 
 from pandas._libs.missing import is_matching_na
-from pandas.compat.numpy import np_version_gte1p25
 
 from pandas.core.dtypes.common import is_float
 
@@ -14,7 +12,6 @@
     MultiIndex,
     Series,
 )
-import pandas._testing as tm
 
 
 @pytest.mark.parametrize(
@@ -48,14 +45,7 @@ def test_equals_list_array(val):
     assert s1.equals(s2)
 
     s1[1] = val
-
-    cm = (
-        tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
-        if isinstance(val, str) and not np_version_gte1p25
-        else nullcontext()
-    )
-    with cm:
-        assert not s1.equals(s2)
+    assert not s1.equals(s2)
 
 
 def test_equals_false_negative():
diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py
index bcc2e4e03f367..091670ed69f11 100644
--- a/pandas/tests/util/test_assert_almost_equal.py
+++ b/pandas/tests/util/test_assert_almost_equal.py
@@ -534,6 +534,10 @@ def test_assert_almost_equal_iterable_values_mismatch():
         np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
         np.array([[1, 2, 3], [4, 5]], dtype=object),
     ),
+    (
+        np.array([np.array([], dtype=object), None], dtype=object),
+        np.array([[], None], dtype=object),
+    ),
     (
         np.array(
             [

From 5927bd8c66f126897d97d03865e1526a0072f6f4 Mon Sep 17 00:00:00 2001
From: ktseng4096 <32848825+ktseng4096@users.noreply.github.com>
Date: Thu, 12 Sep 2024 14:07:29 -0700
Subject: [PATCH 096/176] DOC: Update GroupBy docstrings with See Also
 requirements (#59748)

* update groupby docstrings

* fix function name
---
 ci/code_checks.sh              | 6 ------
 pandas/core/groupby/generic.py | 2 ++
 pandas/core/groupby/groupby.py | 9 +++++++++
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2aa256b65a493..c2ab1e6b62352 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -153,14 +153,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
         -i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
@@ -169,13 +166,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.max SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.min SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
         -i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
         -i "pandas.core.resample.Resampler.ffill RT03" \
         -i "pandas.core.resample.Resampler.get_group RT03,SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 230f61bab96df..eae33ddc1df29 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -615,6 +615,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs):
 
         See Also
         --------
+        Series.filter: Filter elements of ungrouped Series.
         DataFrameGroupBy.filter : Filter elements from groups base on criterion.
 
         Notes
@@ -1963,6 +1964,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
 
         See Also
         --------
+        DataFrame.filter: Filter elements of ungrouped DataFrame.
         SeriesGroupBy.filter : Filter elements from groups base on criterion.
 
         Notes
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 79fe78b7e5405..38dad446b4c39 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -199,6 +199,15 @@ class providing the base-class of operations.
 Series or DataFrame
     Computed {fname} of values within each group.
 
+See Also
+--------
+SeriesGroupBy.min : Return the min of the group values.
+DataFrameGroupBy.min : Return the min of the group values.
+SeriesGroupBy.max : Return the max of the group values.
+DataFrameGroupBy.max : Return the max of the group values.
+SeriesGroupBy.sum : Return the sum of the group values.
+DataFrameGroupBy.sum : Return the sum of the group values.
+
 Examples
 --------
 {example}

From 2c49f555a004a86a2065525b1f424d1b17208b87 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 12 Sep 2024 23:08:34 +0200
Subject: [PATCH 097/176] BUG/API (string dtype): return float dtype for
 series[str].rank() (#59768)

* BUG/API (string dtype): return float dtype for series[str].rank()

* update frame tests

* add whatsnew

* correct whatsnew note
---
 doc/source/whatsnew/v2.3.0.rst           |  1 +
 pandas/core/arrays/arrow/array.py        |  5 +-
 pandas/core/arrays/string_arrow.py       | 11 ++++
 pandas/tests/frame/methods/test_rank.py  | 23 ++------
 pandas/tests/series/methods/test_rank.py | 72 ++++++++++++++++++------
 5 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 03b3a6b55dff6..01c2ed3821d7a 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -102,6 +102,7 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
 - Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
 - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index bd94447f0cd80..39cae5b8e2683 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1999,7 +1999,7 @@ def _rank(
         """
         See Series.rank.__doc__.
         """
-        return self._convert_int_result(
+        return self._convert_rank_result(
             self._rank_calc(
                 axis=axis,
                 method=method,
@@ -2318,6 +2318,9 @@ def _convert_bool_result(self, result):
     def _convert_int_result(self, result):
         return type(self)(result)
 
+    def _convert_rank_result(self, result):
+        return type(self)(result)
+
     def _str_count(self, pat: str, flags: int = 0) -> Self:
         if flags:
             raise NotImplementedError(f"count not implemented with {flags=}")
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index f446cc5bde147..75bb1f8fb1a65 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -29,6 +29,7 @@
 from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
 from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float64Dtype
 from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.arrays.numeric import NumericDtype
 from pandas.core.arrays.string_ import (
@@ -395,6 +396,16 @@ def _convert_int_result(self, result):
 
         return Int64Dtype().__from_arrow__(result)
 
+    def _convert_rank_result(self, result):
+        if self.dtype.na_value is np.nan:
+            if isinstance(result, pa.Array):
+                result = result.to_numpy(zero_copy_only=False)
+            else:
+                result = result.to_numpy()
+            return result.astype("float64", copy=False)
+
+        return Float64Dtype().__from_arrow__(result)
+
     def _reduce(
         self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
     ):
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index c1cdeaa6c10dd..6c6c208ee0c78 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -6,15 +6,11 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.algos import (
     Infinity,
     NegInfinity,
 )
-from pandas.compat import HAS_PYARROW
 
-import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -467,23 +463,10 @@ def test_rank_inf_nans_na_option(
             ("top", False, [2.0, 3.0, 1.0, 4.0]),
         ],
     )
-    def test_rank_object_first(
-        self,
-        request,
-        frame_or_series,
-        na_option,
-        ascending,
-        expected,
-        using_infer_string,
-    ):
+    def test_rank_object_first(self, frame_or_series, na_option, ascending, expected):
         obj = frame_or_series(["foo", "foo", None, "foo"])
-        if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series):
-            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
-
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
-        if using_infer_string and isinstance(obj, Series):
-            expected = expected.astype("uint64")
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -507,7 +490,9 @@ def test_rank_string_dtype(self, string_dtype_no_object):
         # GH#55362
         obj = Series(["foo", "foo", None, "foo"], dtype=string_dtype_no_object)
         result = obj.rank(method="first")
-        exp_dtype = "Int64" if string_dtype_no_object.na_value is pd.NA else "float64"
+        exp_dtype = (
+            "Float64" if string_dtype_no_object == "string[pyarrow]" else "float64"
+        )
         if string_dtype_no_object.storage == "python":
             # TODO nullable string[python] should also return nullable Int64
             exp_dtype = "float64"
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 2d7fde130ce70..7c6a7893ba3a0 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -33,7 +33,8 @@ def ser():
         ["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
         ["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
         ["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
-    ]
+    ],
+    ids=lambda x: x[0],
 )
 def results(request):
     return request.param
@@ -48,12 +49,29 @@ def results(request):
         "Int64",
         pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
         pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        "string[python]",
+        "str",
     ]
 )
 def dtype(request):
     return request.param
 
 
+def expected_dtype(dtype, method, pct=False):
+    exp_dtype = "float64"
+    # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
+    if dtype in ["string[pyarrow]"]:
+        exp_dtype = "Float64"
+    elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]:
+        if method == "average" or pct:
+            exp_dtype = "double[pyarrow]"
+        else:
+            exp_dtype = "uint64[pyarrow]"
+
+    return exp_dtype
+
+
 class TestSeriesRank:
     def test_rank(self, datetime_series):
         sp_stats = pytest.importorskip("scipy.stats")
@@ -251,12 +269,14 @@ def test_rank_signature(self):
         with pytest.raises(ValueError, match=msg):
             s.rank("average")
 
-    @pytest.mark.parametrize("dtype", [None, object])
-    def test_rank_tie_methods(self, ser, results, dtype):
+    def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
         method, exp = results
+        if dtype == "int64" or (not using_infer_string and dtype == "str"):
+            pytest.skip("int64/str does not support NaN")
+
         ser = ser if dtype is None else ser.astype(dtype)
         result = ser.rank(method=method)
-        tm.assert_series_equal(result, Series(exp))
+        tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
 
     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
     @pytest.mark.parametrize(
@@ -357,25 +377,35 @@ def test_rank_methods_series(self, rank_method, op, value):
         ],
     )
     def test_rank_dense_method(self, dtype, ser, exp):
+        if ser[0] < 0 and dtype.startswith("str"):
+            exp = exp[::-1]
         s = Series(ser).astype(dtype)
         result = s.rank(method="dense")
-        expected = Series(exp).astype(result.dtype)
+        expected = Series(exp).astype(expected_dtype(dtype, "dense"))
         tm.assert_series_equal(result, expected)
 
-    def test_rank_descending(self, ser, results, dtype):
+    def test_rank_descending(self, ser, results, dtype, using_infer_string):
         method, _ = results
-        if "i" in dtype:
+        if dtype == "int64" or (not using_infer_string and dtype == "str"):
             s = ser.dropna()
         else:
             s = ser.astype(dtype)
 
         res = s.rank(ascending=False)
-        expected = (s.max() - s).rank()
-        tm.assert_series_equal(res, expected)
+        if dtype.startswith("str"):
+            expected = (s.astype("float64").max() - s.astype("float64")).rank()
+        else:
+            expected = (s.max() - s).rank()
+        tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
 
-        expected = (s.max() - s).rank(method=method)
+        if dtype.startswith("str"):
+            expected = (s.astype("float64").max() - s.astype("float64")).rank(
+                method=method
+            )
+        else:
+            expected = (s.max() - s).rank(method=method)
         res2 = s.rank(method=method, ascending=False)
-        tm.assert_series_equal(res2, expected)
+        tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
 
     def test_rank_int(self, ser, results):
         method, exp = results
@@ -432,9 +462,11 @@ def test_rank_ea_small_values(self):
     ],
 )
 def test_rank_dense_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="dense", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -453,9 +485,11 @@ def test_rank_dense_pct(dtype, ser, exp):
     ],
 )
 def test_rank_min_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="min", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -474,9 +508,11 @@ def test_rank_min_pct(dtype, ser, exp):
     ],
 )
 def test_rank_max_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="max", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -495,9 +531,11 @@ def test_rank_max_pct(dtype, ser, exp):
     ],
 )
 def test_rank_average_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="average", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -516,9 +554,11 @@ def test_rank_average_pct(dtype, ser, exp):
     ],
 )
 def test_rank_first_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="first", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True))
     tm.assert_series_equal(result, expected)
 
 
From 0d2505dca9c34b666155c1483d592877206081aa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 12 Sep 2024 23:11:52 +0200
Subject: [PATCH 098/176] String dtype: fix isin() values handling for python
 storage (#59759)

* String dtype: fix isin() values handling for python storage

* address feedback
---
 pandas/conftest.py                         |  9 ++++-
 pandas/core/arrays/string_.py              | 20 +++++++++++
 pandas/tests/arrays/string_/test_string.py | 41 +++++++++++++++++++---
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 222aefb4afda8..e2db9260ac37d 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1338,7 +1338,13 @@ def string_storage(request):
         pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
         pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
         ("python", np.nan),
-    ]
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
 )
 def string_dtype_arguments(request):
     """
@@ -1369,6 +1375,7 @@ def dtype_backend(request):
 
 # Alias so we can test with cartesian product of string_storage
 string_storage2 = string_storage
+string_dtype_arguments2 = string_dtype_arguments
 
 
 @pytest.fixture(params=tm.BYTES_DTYPES)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index a46475a7d1ec2..b3aa782341c77 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -46,6 +46,7 @@
     nanops,
     ops,
 )
+from pandas.core.algorithms import isin
 from pandas.core.array_algos import masked_reductions
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.floating import (
@@ -65,6 +66,7 @@
     import pyarrow
 
     from pandas._typing import (
+        ArrayLike,
         AxisInt,
         Dtype,
         DtypeObj,
@@ -735,6 +737,24 @@ def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
         # base class implementation that uses __setitem__
         ExtensionArray._putmask(self, mask, value)
 
+    def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
+        if isinstance(values, BaseStringArray) or (
+            isinstance(values, ExtensionArray) and is_string_dtype(values.dtype)
+        ):
+            values = values.astype(self.dtype, copy=False)
+        else:
+            if not lib.is_string_array(np.asarray(values), skipna=True):
+                values = np.array(
+                    [val for val in values if isinstance(val, str) or isna(val)],
+                    dtype=object,
+                )
+                if not len(values):
+                    return np.zeros(self.shape, dtype=bool)
+
+            values = self._from_sequence(values, dtype=self.dtype)
+
+        return isin(np.asarray(self), np.asarray(values))
+
     def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 87bd1d5921caa..33708be497f31 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -30,6 +30,12 @@ def dtype(string_dtype_arguments):
     return pd.StringDtype(storage=storage, na_value=na_value)
 
 
+@pytest.fixture
+def dtype2(string_dtype_arguments2):
+    storage, na_value = string_dtype_arguments2
+    return pd.StringDtype(storage=storage, na_value=na_value)
+
+
 @pytest.fixture
 def cls(dtype):
     """Fixture giving array type from parametrized 'dtype'"""
@@ -662,11 +668,7 @@ def test_isin(dtype, fixed_now_ts):
     tm.assert_series_equal(result, expected)
 
     result = s.isin(["a", pd.NA])
-    if dtype.storage == "python" and dtype.na_value is np.nan:
-        # TODO(infer_string) we should make this consistent
-        expected = pd.Series([True, False, False])
-    else:
-        expected = pd.Series([True, False, True])
+    expected = pd.Series([True, False, True])
     tm.assert_series_equal(result, expected)
 
     result = s.isin([])
@@ -677,6 +679,35 @@ def test_isin(dtype, fixed_now_ts):
     expected = pd.Series([True, False, False])
     tm.assert_series_equal(result, expected)
 
+    result = s.isin([fixed_now_ts])
+    expected = pd.Series([False, False, False])
+    tm.assert_series_equal(result, expected)
+
+
+def test_isin_string_array(dtype, dtype2):
+    s = pd.Series(["a", "b", None], dtype=dtype)
+
+    result = s.isin(pd.array(["a", "c"], dtype=dtype2))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+
+    result = s.isin(pd.array(["a", None], dtype=dtype2))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+
+
+def test_isin_arrow_string_array(dtype):
+    pa = pytest.importorskip("pyarrow")
+    s = pd.Series(["a", "b", None], dtype=dtype)
+
+    result = s.isin(pd.array(["a", "c"], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+
+    result = s.isin(pd.array(["a", None], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+
 
 def test_setitem_scalar_with_mask_validation(dtype):
     # https://github.com/pandas-dev/pandas/issues/47628

From 73c4fce2fe8c8893b1d370ce04211c59c8182d61 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 13 Sep 2024 23:55:51 +0530
Subject: [PATCH 099/176] DOC: fix SA01 for pandas.NA (#59787)

---
 ci/code_checks.sh        | 1 -
 pandas/_libs/missing.pyx | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index c2ab1e6b62352..7ad29b3a2a1f3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,7 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.NA SA01" \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.PeriodDtype.freq SA01" \
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index 2f44128cda822..390a527c22bbb 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -347,6 +347,14 @@ class NAType(C_NAType):
     The NA singleton is a missing value indicator defined by pandas. It is
     used in certain new extension dtypes (currently the "string" dtype).
 
+    See Also
+    --------
+    numpy.nan : Floating point representation of Not a Number (NaN) for numerical data.
+    isna : Detect missing values for an array-like object.
+    notna : Detect non-missing values for an array-like object.
+    DataFrame.fillna : Fill missing values in a DataFrame.
+    Series.fillna : Fill missing values in a Series.
+
     Examples
     --------
     >>> pd.NA

From a71df34cb841d5aefb94458767d6987caf02ae67 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 14 Sep 2024 22:21:08 +0530
Subject: [PATCH 100/176] DOC: fix SA01,ES01 for pandas.Timedelta.components
 (#59799)

---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timedeltas.pyx | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7ad29b3a2a1f3..fd42fa70a6f7c 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -102,7 +102,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Series.sparse.sp_values SA01" \
-        -i "pandas.Timedelta.components SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 4f90f26cf31ab..6159bd0dadb47 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1493,6 +1493,17 @@ cdef class _Timedelta(timedelta):
         """
         Return a components namedtuple-like.
 
+        Each component represents a different time unit, allowing you to access the
+        breakdown of the total duration in terms of days, hours, minutes, seconds,
+        milliseconds, microseconds, and nanoseconds.
+
+        See Also
+        --------
+        Timedelta.total_seconds : Returns the total duration of the Timedelta in
+            seconds.
+        to_timedelta : Convert argument to Timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('2 day 4 min 3 us 42 ns')

From 695dbde594f6d7eef732340f57b4439f6661e74a Mon Sep 17 00:00:00 2001
From: ammar-qazi <ammmarqz@gmail.com>
Date: Sat, 14 Sep 2024 18:53:36 +0200
Subject: [PATCH 101/176] Update ExtensionArray.interpolate to remove outdated
 method of pad (#59798)

---
 pandas/core/arrays/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index a933a9ce11646..5f2c2a7772f78 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1010,7 +1010,6 @@ def interpolate(
             * 'time': Works on daily and higher resolution data to interpolate
             given length of interval.
             * 'index', 'values': use the actual numerical values of the index.
-            * 'pad': Fill in NaNs using existing values.
             * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
             'polynomial': Passed to scipy.interpolate.interp1d, whereas 'spline'
             is passed to scipy.interpolate.UnivariateSpline. These methods use

From e3bcd10d7dedd71a70a5229ce2b53c543feb63c5 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 14 Sep 2024 22:24:30 +0530
Subject: [PATCH 102/176] DOC: fix SA01,ES01 for pandas.PeriodDtype.freq
 (#59796)

---
 ci/code_checks.sh            |  1 -
 pandas/core/dtypes/dtypes.py | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fd42fa70a6f7c..73b389e427648 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -72,7 +72,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
-        -i "pandas.PeriodDtype.freq SA01" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
         -i "pandas.RangeIndex.step SA01" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 68b4807961d19..bb6610c514375 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1065,6 +1065,20 @@ def freq(self) -> BaseOffset:
         """
         The frequency object of this PeriodDtype.
 
+        The `freq` property returns the `BaseOffset` object that represents the
+        frequency of the PeriodDtype. This frequency specifies the interval (e.g.,
+        daily, monthly, yearly) associated with the Period type. It is essential
+        for operations that depend on time-based calculations within a period index
+        or series.
+
+        See Also
+        --------
+        Period : Represents a period of time.
+        PeriodIndex : Immutable ndarray holding ordinal values indicating
+            regular periods.
+        PeriodDtype : An ExtensionDtype for Period data.
+        date_range : Return a fixed frequency range of dates.
+
         Examples
         --------
         >>> dtype = pd.PeriodDtype(freq="D")

From e215121f71a59ba44b614f1962a960b8415864ad Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 15 Sep 2024 22:45:39 +0530
Subject: [PATCH 103/176] DOC: fix SA01,ES01 for pandas.Timedelta.total_seconds
 (#59800)

* DOC: fix SA01,ES01 for pandas.Timedelta.total_seconds

* DOC: fix SA01,ES01 for pandas.Timedelta.total_seconds
---
 ci/code_checks.sh                  | 1 -
 pandas/_libs/tslibs/nattype.pyx    | 8 ++++++++
 pandas/_libs/tslibs/timedeltas.pyx | 8 ++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 73b389e427648..606ede4e861fa 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -105,7 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
         -i "pandas.Timedelta.to_timedelta64 SA01" \
-        -i "pandas.Timedelta.total_seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 60afc1acdc297..620e0846c750e 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -493,6 +493,14 @@ class NaTType(_NaT):
         """
         Total seconds in the duration.
 
+        This method calculates the total duration in seconds by combining
+        the days, seconds, and microseconds of the `Timedelta` object.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('1min')
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 6159bd0dadb47..0ff5c5fb81df8 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1189,6 +1189,14 @@ cdef class _Timedelta(timedelta):
         """
         Total seconds in the duration.
 
+        This method calculates the total duration in seconds by combining
+        the days, seconds, and microseconds of the `Timedelta` object.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta.
+        Timedelta : Represents a duration, the difference between two dates or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('1min')

From 679578742669e208265b9089b6afe3f0451be680 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 15 Sep 2024 22:46:32 +0530
Subject: [PATCH 104/176] DOC: fix SA01 for pandas.api.types.is_array_like
 (#59802)

---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/common.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 606ede4e861fa..ff5bfee1518c4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -113,7 +113,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
         -i "pandas.api.types.is_dict_like PR07,SA01" \
-        -i "pandas.api.types.is_extension_array_dtype SA01" \
         -i "pandas.api.types.is_file_like PR07,SA01" \
         -i "pandas.api.types.is_float PR01,SA01" \
         -i "pandas.api.types.is_float_dtype SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 16f6bd396fe93..de38395cecad3 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1401,6 +1401,10 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     bool
         Whether the `arr_or_dtype` is an extension array type.
 
+    See Also
+    --------
+    api.extensions.ExtensionArray : Abstract base class for pandas extension arrays.
+
     Notes
     -----
     This checks whether an object implements the pandas extension

From 2b37219aa0617edce4f4326aec71e049b6acc1d2 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 15 Sep 2024 22:47:05 +0530
Subject: [PATCH 105/176] DOC: fix SA01 for pandas.api.types.is_integer_dtype
 (#59803)

---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/common.py | 9 +++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index ff5bfee1518c4..ffa540291e560 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -119,7 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_int64_dtype SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
-        -i "pandas.api.types.is_integer_dtype SA01" \
         -i "pandas.api.types.is_interval_dtype SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_list_like SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index de38395cecad3..ff855f97a352b 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -694,6 +694,15 @@ def is_integer_dtype(arr_or_dtype) -> bool:
         Whether or not the array or dtype is of an integer dtype and
         not an instance of timedelta64.
 
+    See Also
+    --------
+    api.types.is_integer : Return True if given object is integer.
+    api.types.is_numeric_dtype : Check whether the provided array or dtype is of a
+        numeric dtype.
+    api.types.is_float_dtype : Check whether the provided array or dtype is of a
+        float dtype.
+    Int64Dtype : An ExtensionDtype for Int64Dtype integer data.
+
     Examples
     --------
     >>> from pandas.api.types import is_integer_dtype

From 1d80ac59028b01d3efc15b97119cf6b3c896c1da Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 15 Sep 2024 22:47:47 +0530
Subject: [PATCH 106/176] DOC: fix SA01 for pandas.arrays.FloatingArray
 (#59804)

---
 ci/code_checks.sh              | 1 -
 pandas/core/arrays/floating.py | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index ffa540291e560..f022e0176a987 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -130,7 +130,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
         -i "pandas.arrays.BooleanArray SA01" \
         -i "pandas.arrays.DatetimeArray SA01" \
-        -i "pandas.arrays.FloatingArray SA01" \
         -i "pandas.arrays.IntegerArray SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
         -i "pandas.arrays.IntervalArray.length SA01" \
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
index b3fbf0f92c32d..67c23f4825a7f 100644
--- a/pandas/core/arrays/floating.py
+++ b/pandas/core/arrays/floating.py
@@ -96,6 +96,14 @@ class FloatingArray(NumericArray):
     -------
     FloatingArray
 
+    See Also
+    --------
+    array : Create an array.
+    Float32Dtype : Float32 dtype for FloatingArray.
+    Float64Dtype : Float64 dtype for FloatingArray.
+    Series : One-dimensional labeled array capable of holding data.
+    DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.
+
     Examples
     --------
     Create an FloatingArray with :func:`pandas.array`:

From 235e1bea1366f9ffd54866e7a997d2a75016bf84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <6618166+twoertwein@users.noreply.github.com>
Date: Sun, 15 Sep 2024 14:24:36 -0400
Subject: [PATCH 107/176] WEB: update list of (in)active core devs (#59808)

---
 web/pandas/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/pandas/config.yml b/web/pandas/config.yml
index 74e7fda2e7983..a49aadd45204a 100644
--- a/web/pandas/config.yml
+++ b/web/pandas/config.yml
@@ -89,7 +89,6 @@ maintainers:
   - phofl
   - attack68
   - fangchenli
-  - twoertwein
   - lithomas1
   - lukemanley
   - noatamir
@@ -108,6 +107,7 @@ maintainers:
   - wesm
   - gfyoung
   - mzeitlin11
+  - twoertwein
 workgroups:
   coc:
     name: Code of Conduct

From 3e8ac12d1dacc2308b2f4c2869fa7bc2079bd323 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 15 Sep 2024 22:00:26 +0200
Subject: [PATCH 108/176] BUG (CoW): fix reference tracking in replace_list
 with None (#59807)

---
 pandas/core/internals/blocks.py        | 2 +-
 pandas/tests/copy_view/test_replace.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index dced92ba04520..cb40e920149fa 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -915,7 +915,7 @@ def _replace_coerce(
                         nb = nb.copy()
                     putmask_inplace(nb.values, mask, value)
                     return [nb]
-                return [self]
+                return [self.copy(deep=False)]
             return self.replace(
                 to_replace=to_replace,
                 value=value,
diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py
index 58c979fb05089..a8acd446ff5f5 100644
--- a/pandas/tests/copy_view/test_replace.py
+++ b/pandas/tests/copy_view/test_replace.py
@@ -286,6 +286,12 @@ def test_replace_list_none():
 
     assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
 
+    # replace multiple values that don't actually replace anything with None
+    # https://github.com/pandas-dev/pandas/issues/59770
+    df3 = df.replace(["d", "e", "f"], value=None)
+    tm.assert_frame_equal(df3, df_orig)
+    assert tm.shares_memory(get_array(df, "a"), get_array(df3, "a"))
+
 
 def test_replace_list_none_inplace_refs():
     df = DataFrame({"a": ["a", "b", "c"]})

From 122fc4c6f45b8e603132b57c4cf99c8837bac43e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 15 Sep 2024 23:03:33 +0200
Subject: [PATCH 109/176] DOC: add whatsnew for v2.2.3 (#59811)

* DOC: add whatsnew for v2.2.3

* fix warning

---------

Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com>
---
 doc/source/whatsnew/index.rst  |  1 +
 doc/source/whatsnew/v2.2.3.rst | 36 ++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 doc/source/whatsnew/v2.2.3.rst

diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 2f7ec52d117f8..1dd6c5fabef04 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -32,6 +32,7 @@ Version 2.2
 .. toctree::
    :maxdepth: 2
 
+   v2.2.3
    v2.2.2
    v2.2.1
    v2.2.0
diff --git a/doc/source/whatsnew/v2.2.3.rst b/doc/source/whatsnew/v2.2.3.rst
new file mode 100644
index 0000000000000..aa6e241e74b0a
--- /dev/null
+++ b/doc/source/whatsnew/v2.2.3.rst
@@ -0,0 +1,36 @@
+.. _whatsnew_223:
+
+What's new in 2.2.3 (September XX, 2024)
+----------------------------------------
+
+These are the changes in pandas 2.2.3. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.other:
+
+Other
+~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_223.contributors:
+
+Contributors
+~~~~~~~~~~~~

From 160b3eb4be5150a2d2bcb6b4e47dc8a44a4c0922 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 16 Sep 2024 02:43:06 +0530
Subject: [PATCH 110/176] DOC: fix SA01 for pandas.errors.MergeError (#59805)

* DOC: fix SA01 for pandas.errors.MergeError

* DOC: fix SA01 for pandas.errors.MergeError

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh         | 1 -
 pandas/errors/__init__.py | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f022e0176a987..b57426dbb2078 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -191,7 +191,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.IntCastingNaNError SA01" \
         -i "pandas.errors.InvalidIndexError SA01" \
         -i "pandas.errors.InvalidVersion SA01" \
-        -i "pandas.errors.MergeError SA01" \
         -i "pandas.errors.NullFrequencyError SA01" \
         -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 2f625090e0492..7851bc90c5782 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -261,6 +261,11 @@ class MergeError(ValueError):
 
     Subclass of ``ValueError``.
 
+    See Also
+    --------
+    DataFrame.join : For joining DataFrames on their indexes.
+    merge : For merging two DataFrames on a common set of keys.
+
     Examples
     --------
     >>> left = pd.DataFrame(

From 013ac6702c738b73a6729aa75399eebe9ef52f45 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 16 Sep 2024 19:25:59 +0200
Subject: [PATCH 111/176] String dtype: allow string dtype in query/eval with
 default numexpr engine (#59810)

String dtype: allow string dtype in query/eval with default mumexpr engine
---
 pandas/core/computation/eval.py       | 12 +++++++++---
 pandas/core/computation/expr.py       |  6 +++++-
 pandas/tests/frame/test_query_eval.py | 24 ++++++------------------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index aad768d31483a..485c7f87d6f33 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -14,7 +14,10 @@
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_bool_kwarg
 
-from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.common import (
+    is_extension_array_dtype,
+    is_string_dtype,
+)
 
 from pandas.core.computation.engines import ENGINES
 from pandas.core.computation.expr import (
@@ -345,10 +348,13 @@ def eval(
         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
 
         if engine == "numexpr" and (
-            is_extension_array_dtype(parsed_expr.terms.return_type)
+            (
+                is_extension_array_dtype(parsed_expr.terms.return_type)
+                and not is_string_dtype(parsed_expr.terms.return_type)
+            )
             or getattr(parsed_expr.terms, "operand_types", None) is not None
             and any(
-                is_extension_array_dtype(elem)
+                (is_extension_array_dtype(elem) and not is_string_dtype(elem))
                 for elem in parsed_expr.terms.operand_types
             )
         ):
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index b074e768e0842..f45bc453d2541 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -21,6 +21,8 @@
 
 from pandas.errors import UndefinedVariableError
 
+from pandas.core.dtypes.common import is_string_dtype
+
 import pandas.core.common as com
 from pandas.core.computation.ops import (
     ARITH_OPS_SYMS,
@@ -524,10 +526,12 @@ def _maybe_evaluate_binop(
         elif self.engine != "pytables":
             if (
                 getattr(lhs, "return_type", None) == object
+                or is_string_dtype(getattr(lhs, "return_type", None))
                 or getattr(rhs, "return_type", None) == object
+                or is_string_dtype(getattr(rhs, "return_type", None))
             ):
                 # evaluate "==" and "!=" in python if either of our operands
-                # has an object return type
+                # has an object or string return type
                 return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
         return res
 
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index fa71153d01157..a574989860957 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     NumExprClobberingError,
     UndefinedVariableError,
@@ -762,7 +760,6 @@ def test_inf(self, op, f, engine, parser):
         result = df.query(q, engine=engine, parser=parser)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_check_tz_aware_index_query(self, tz_aware_fixture):
         # https://github.com/pandas-dev/pandas/issues/29463
         tz = tz_aware_fixture
@@ -775,6 +772,7 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture):
         tm.assert_frame_equal(result, expected)
 
         expected = DataFrame(df_index)
+        expected.columns = expected.columns.astype(object)
         result = df.reset_index().query('"2018-01-03 00:00:00+00" < time')
         tm.assert_frame_equal(result, expected)
 
@@ -1072,7 +1070,7 @@ def test_query_with_string_columns(self, parser, engine):
             with pytest.raises(NotImplementedError, match=msg):
                 df.query("a in b and c < d", parser=parser, engine=engine)
 
-    def test_object_array_eq_ne(self, parser, engine, using_infer_string):
+    def test_object_array_eq_ne(self, parser, engine):
         df = DataFrame(
             {
                 "a": list("aaaabbbbcccc"),
@@ -1081,14 +1079,11 @@ def test_object_array_eq_ne(self, parser, engine, using_infer_string):
                 "d": np.random.default_rng(2).integers(9, size=12),
             }
         )
-        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
-        with tm.assert_produces_warning(warning):
-            res = df.query("a == b", parser=parser, engine=engine)
+        res = df.query("a == b", parser=parser, engine=engine)
         exp = df[df.a == df.b]
         tm.assert_frame_equal(res, exp)
 
-        with tm.assert_produces_warning(warning):
-            res = df.query("a != b", parser=parser, engine=engine)
+        res = df.query("a != b", parser=parser, engine=engine)
         exp = df[df.a != df.b]
         tm.assert_frame_equal(res, exp)
 
@@ -1128,15 +1123,13 @@ def test_query_with_nested_special_character(self, parser, engine):
         ],
     )
     def test_query_lex_compare_strings(
-        self, parser, engine, op, func, using_infer_string
+        self, parser, engine, op, func
     ):
         a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({"X": a, "Y": b})
 
-        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
-        with tm.assert_produces_warning(warning):
-            res = df.query(f'X {op} "d"', engine=engine, parser=parser)
+        res = df.query(f'X {op} "d"', engine=engine, parser=parser)
         expected = df[func(df.X, "d")]
         tm.assert_frame_equal(res, expected)
 
@@ -1400,7 +1393,6 @@ def test_expr_with_column_name_with_backtick(self):
         expected = df[df["a`b"] < 2]
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_string_with_backticks(self):
         # GH 59285
         df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
@@ -1408,7 +1400,6 @@ def test_expr_with_string_with_backticks(self):
         expected = df["```" < df["#backticks"]]
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_string_with_backticked_substring_same_as_column_name(self):
         # GH 59285
         df = DataFrame(("`", "`````", "``````````"), columns=["#backticks"])
@@ -1439,7 +1430,6 @@ def test_expr_with_column_names_with_special_characters(self, col1, col2, expr):
         expected = df[df[col1] < df[col2]]
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_no_backticks(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column_name"])
@@ -1483,7 +1473,6 @@ def test_expr_with_quote_opened_before_backtick_and_quote_is_unmatched(self):
         ):
             df.query("`column-name` < 'It`s that\\'s \"quote\" #hash")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])
@@ -1491,7 +1480,6 @@ def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_at_end(self
         expected = df[df["column-name"] < 'It`s that\'s "quote" #hash']
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_expr_with_quote_opened_before_backtick_and_quote_is_matched_in_mid(self):
         # GH 59285
         df = DataFrame(("aaa", "vvv", "zzz"), columns=["column-name"])

From 081dcdee8d754af90e307cf2311b06b3d02fae2a Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Mon, 16 Sep 2024 19:40:33 +0200
Subject: [PATCH 112/176] BUG: Remove np._get_promotion_state usage (#59818)

---
 pandas/tests/series/indexing/test_setitem.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 71ba2dab671ef..789e3ac752097 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -4,13 +4,17 @@
     datetime,
 )
 from decimal import Decimal
+import os
 
 import numpy as np
 import pytest
 
 from pandas._config import using_string_dtype
 
-from pandas.compat import HAS_PYARROW
+from pandas.compat import (
+    HAS_PYARROW,
+    WASM,
+)
 from pandas.compat.numpy import np_version_gte1p24
 from pandas.errors import IndexingError
 
@@ -1446,7 +1450,11 @@ def obj(self):
             marks=pytest.mark.xfail(
                 (
                     not np_version_gte1p24
-                    or (np_version_gte1p24 and np._get_promotion_state() != "weak")
+                    or (
+                        np_version_gte1p24
+                        and os.environ.get("NPY_PROMOTION_STATE", "weak") != "weak"
+                    )
+                    or WASM
                 ),
                 reason="np.float32(1.1) ends up as 1.100000023841858, so "
                 "np_can_hold_element raises and we cast to float64",

From 8b1b2114ea72b9b79220e3cb2828b3e562bb5e07 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Tue, 17 Sep 2024 19:00:50 -0400
Subject: [PATCH 113/176] CI: Debug failing ARM builds (#59813)

* try bumping cython?

* maybe pinning numpy helps?

* skip tests

* Update test_sparse.py

* go for green

* Update test_sparse.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 pandas/tests/extension/test_sparse.py |  5 +++++
 pandas/tests/series/test_ufunc.py     |  5 ++++-
 pyproject.toml                        | 12 ++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index 56c023d99bb1c..b7685a61d4937 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -340,11 +340,16 @@ def test_argmin_argmax_all_na(self, method, data, na_value):
         self._check_unsupported(data)
         super().test_argmin_argmax_all_na(method, data, na_value)
 
+    @pytest.mark.fails_arm_wheels
     @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame])
     def test_equals(self, data, na_value, as_series, box):
         self._check_unsupported(data)
         super().test_equals(data, na_value, as_series, box)
 
+    @pytest.mark.fails_arm_wheels
+    def test_equals_same_data_different_object(self, data):
+        super().test_equals_same_data_different_object(data)
+
     @pytest.mark.parametrize(
         "func, na_action, expected",
         [
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
index 36a2afb2162c2..a5976bb2518c9 100644
--- a/pandas/tests/series/test_ufunc.py
+++ b/pandas/tests/series/test_ufunc.py
@@ -16,7 +16,10 @@ def ufunc(request):
     return request.param
 
 
-@pytest.fixture(params=[True, False], ids=["sparse", "dense"])
+@pytest.fixture(
+    params=[pytest.param(True, marks=pytest.mark.fails_arm_wheels), False],
+    ids=["sparse", "dense"],
+)
 def sparse(request):
     return request.param
 
diff --git a/pyproject.toml b/pyproject.toml
index 645ded35f3d18..9e4199ab735c6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -163,6 +163,14 @@ before-test = "bash {package}/scripts/cibw_before_test.sh"
 before-build = "pip install delvewheel && bash {package}/scripts/cibw_before_build.sh"
 repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}"
 
+[[tool.cibuildwheel.overrides]]
+select = "*-manylinux_aarch64*"
+test-command = """
+  PANDAS_CI='1' python -c 'import pandas as pd; \
+  pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db and not fails_arm_wheels", "-n 2", "--no-strict-data-files"]); \
+  pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \
+  """
+
 [[tool.cibuildwheel.overrides]]
 select = "*-musllinux*"
 before-test = "apk update && apk add musl-locales && bash {package}/scripts/cibw_before_test.sh"
@@ -478,6 +486,10 @@ markers = [
   "clipboard: mark a pd.read_clipboard test",
   "arm_slow: mark a test as slow for arm64 architecture",
   "skip_ubsan: Tests known to fail UBSAN check",
+  # TODO: someone should investigate this ...
+  # these tests only fail in the wheel builder and don't fail in regular
+  # ARM CI
+  "fails_arm_wheels: Tests that fail in the ARM wheel build only",
 ]
 
 [tool.mypy]

From a851438906ad5ec5f33df4a28ced85c4a0dcb492 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 18 Sep 2024 22:20:41 +0530
Subject: [PATCH 114/176] DOC: fix SA01,ES01 for
 pandas.tseries.offsets.WeekOfMonth (#59834)

---
 ci/code_checks.sh               |  1 -
 pandas/_libs/tslibs/offsets.pyx | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b57426dbb2078..f2d9f582d8932 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -383,7 +383,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.tseries.offsets.Week.n GL08" \
         -i "pandas.tseries.offsets.Week.normalize GL08" \
         -i "pandas.tseries.offsets.Week.weekday GL08" \
-        -i "pandas.tseries.offsets.WeekOfMonth SA01" \
         -i "pandas.tseries.offsets.WeekOfMonth.is_on_offset GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.n GL08" \
         -i "pandas.tseries.offsets.WeekOfMonth.normalize GL08" \
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 043c029ec900c..4fa1af0ec882c 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -3582,6 +3582,11 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
     """
     Describes monthly dates like "the Tuesday of the 2nd week of each month".
 
+    This offset allows for generating or adjusting dates by specifying
+    a particular week and weekday within a month. The week is zero-indexed,
+    where 0 corresponds to the first week of the month, and weekday follows
+    a Monday=0 convention.
+
     Attributes
     ----------
     n : int, default 1
@@ -3602,6 +3607,12 @@ cdef class WeekOfMonth(WeekOfMonthMixin):
         - 5 is Saturday
         - 6 is Sunday.
 
+    See Also
+    --------
+    offsets.Week : Describes weekly frequency adjustments.
+    offsets.MonthEnd : Describes month-end frequency adjustments.
+    date_range : Generates a range of dates based on a specific frequency.
+
     Examples
     --------
     >>> ts = pd.Timestamp(2022, 1, 1)

From 0ad2c0d549ecc866a334e482afadc96845a01efa Mon Sep 17 00:00:00 2001
From: Matthew Simpson <156332325+ms041223@users.noreply.github.com>
Date: Wed, 18 Sep 2024 19:25:53 +0100
Subject: [PATCH 115/176] DOC: Adding ArcticDB to the ecosystem.md page
 (#59830)

* adding ArcticDB to the ecosystem.md page

* Update web/pandas/community/ecosystem.md

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* making pandas lower case

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 web/pandas/community/ecosystem.md | 91 +++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
index 73a3cb6429790..2ea10954fc929 100644
--- a/web/pandas/community/ecosystem.md
+++ b/web/pandas/community/ecosystem.md
@@ -367,6 +367,97 @@ pandas-gbq provides high performance reads and writes to and from
 these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`.
 Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead.
 
+
+### [ArcticDB](https://github.com/man-group/ArcticDB)
+
+ArcticDB is a serverless DataFrame database engine designed for the Python Data Science ecosystem. ArcticDB enables you to store, retrieve, and process pandas DataFrames at scale. It is a storage engine designed for object storage and also supports local-disk storage using LMDB. ArcticDB requires zero additional infrastructure beyond a running Python environment and access to object storage and can be installed in seconds. Please find full documentation [here](https://docs.arcticdb.io/latest/).
+
+#### ArcticDB Terminology
+
+ArcticDB is structured to provide a scalable and efficient way to manage and retrieve DataFrames, organized into several key components:
+
+- `Object Store` Collections of libraries. Used to separate logical environments from each other. Analogous to a database server.
+- `Library` Contains multiple symbols which are grouped in a certain way (different users, markets, etc). Analogous to a database.
+- `Symbol` Atomic unit of data storage. Identified by a string name. Data stored under a symbol strongly resembles a pandas DataFrame. Analogous to tables.
+- `Version` Every modifying action (write, append, update) performed on a symbol creates a new version of that object.
+
+#### Installation
+
+To install, simply run:
+
+```console
+pip install arcticdb
+```
+
+To get started, we can import ArcticDB and instantiate it:
+
+```python
+import arcticdb as adb
+import numpy as np
+import pandas as pd
+# this will set up the storage using the local file system
+arctic = adb.Arctic("lmdb://arcticdb_test")
+```
+
+> **Note:** ArcticDB supports any S3 API compatible storage, including AWS. ArcticDB also supports Azure Blob storage.  
+> ArcticDB also supports LMDB for local/file based storage - to use LMDB, pass an LMDB path as the URI: `adb.Arctic('lmdb://path/to/desired/database')`.
+
+#### Library Setup
+
+ArcticDB is geared towards storing many (potentially millions) of tables. Individual tables (DataFrames) are called symbols and are stored in collections called libraries. A single library can store many symbols. Libraries must first be initialized prior to use:
+
+```python
+lib = arctic.get_library('sample', create_if_missing=True)
+```
+
+#### Writing Data to ArcticDB
+
+Now we have a library set up, we can get to reading and writing data. ArcticDB has a set of simple functions for DataFrame storage. Let's write a DataFrame to storage.
+
+```python
+df = pd.DataFrame(
+    {
+        "a": list("abc"),
+        "b": list(range(1, 4)),
+        "c": np.arange(3, 6).astype("u1"),
+        "d": np.arange(4.0, 7.0, dtype="float64"),
+        "e": [True, False, True],
+        "f": pd.date_range("20130101", periods=3)
+    }
+)
+
+df
+df.dtypes
+```
+
+Write to ArcticDB.
+
+```python
+write_record = lib.write("test", df)
+```
+
+> **Note:** When writing pandas DataFrames, ArcticDB supports the following index types:
+>
+> - `pandas.Index` containing int64 (or the corresponding dedicated types Int64Index, UInt64Index)
+> - `RangeIndex`
+> - `DatetimeIndex`
+> - `MultiIndex` composed of above supported types
+>
+> The "row" concept in `head`/`tail` refers to the row number ('iloc'), not the value in the `pandas.Index` ('loc').
+
+#### Reading Data from ArcticDB
+
+Read the data back from storage:
+
+```python
+read_record = lib.read("test")
+read_record.data
+df.dtypes
+```
+
+ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/query_builder/).
+
+
 ## Out-of-core
 
 ### [Bodo](https://bodo.ai/)

From 09c7a873cacfcb2caa38329d4bb27d61fd153d74 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 18 Sep 2024 17:16:55 -0400
Subject: [PATCH 116/176] BLD: Fix bad Cython annotation (#59836)

---
 pandas/_libs/tslibs/np_datetime.pxd | 2 +-
 pandas/_libs/tslibs/np_datetime.pyx | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index 43240046c6500..3e5654b70cd92 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -89,7 +89,7 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    format: str | None = *,
+    str format = *,
     bint exact = *
 ) except? -1
 
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 61095b3f034fd..0b02fc13246f0 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -331,7 +331,7 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    format: str | None=None,
+    str format=None,
     bint exact=True,
 ) except? -1:
     cdef:

From 22372175e04f05f73521cab1b26f0818d6766717 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 19 Sep 2024 00:46:07 +0200
Subject: [PATCH 117/176] BLD/RLS: build wheels with released numpy/cython for
 Python 3.13 (#59819)

---
 .github/workflows/wheels.yml | 6 +-----
 MANIFEST.in                  | 1 -
 pyproject.toml               | 3 +--
 scripts/cibw_before_build.sh | 8 +++-----
 scripts/cibw_before_test.sh  | 8 --------
 5 files changed, 5 insertions(+), 21 deletions(-)
 delete mode 100644 scripts/cibw_before_test.sh

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 67d8715f72614..2aaec8c9b56b0 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -102,9 +102,7 @@ jobs:
         python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
         include:
         # TODO: Remove this plus installing build deps in cibw_before_build.sh
-        # and test deps in cibw_before_test.sh after pandas can be built with a released NumPy/Cython
-        - python: ["cp313", "3.13"]
-          cibw_build_frontend: 'pip; args: --no-build-isolation'
+        # after pandas can be built with a released NumPy/Cython
         - python: ["cp313t", "3.13"]
           cibw_build_frontend: 'pip; args: --no-build-isolation'
         # Build Pyodide wheels and upload them to Anaconda.org
@@ -187,11 +185,9 @@ jobs:
       - name: Test Windows Wheels
         if: ${{ matrix.buildplat[1] == 'win_amd64' }}
         shell: pwsh
-        # TODO: Remove NumPy nightly install when there's a 3.13 wheel on PyPI
         run: |
           $TST_CMD = @"
           python -m pip install hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0;
-          ${{ matrix.python[1] == '3.13' && 'python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy;' }}
           python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
           "@
diff --git a/MANIFEST.in b/MANIFEST.in
index f586d457eaaf8..a7d7d7eb4e062 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -65,4 +65,3 @@ graft pandas/_libs/include
 
 # Include cibw script in sdist since it's needed for building wheels
 include scripts/cibw_before_build.sh
-include scripts/cibw_before_test.sh
diff --git a/pyproject.toml b/pyproject.toml
index 9e4199ab735c6..5ffd9d9a5608c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -157,7 +157,6 @@ test-command = """
   """
 free-threaded-support = true
 before-build = "bash {package}/scripts/cibw_before_build.sh"
-before-test = "bash {package}/scripts/cibw_before_test.sh"
 
 [tool.cibuildwheel.windows]
 before-build = "pip install delvewheel && bash {package}/scripts/cibw_before_build.sh"
@@ -173,7 +172,7 @@ test-command = """
 
 [[tool.cibuildwheel.overrides]]
 select = "*-musllinux*"
-before-test = "apk update && apk add musl-locales && bash {package}/scripts/cibw_before_test.sh"
+before-test = "apk update && apk add musl-locales"
 
 [[tool.cibuildwheel.overrides]]
 select = "*-win*"
diff --git a/scripts/cibw_before_build.sh b/scripts/cibw_before_build.sh
index f3049b27ed5d1..6186340807f8f 100644
--- a/scripts/cibw_before_build.sh
+++ b/scripts/cibw_before_build.sh
@@ -1,8 +1,6 @@
-# TODO: Delete when there's PyPI NumPy/Cython releases the support Python 3.13.
-# If free-threading support is not included in those releases, this script will have
-# to whether this runs for a free-threaded build instead.
-PYTHON_VERSION="$(python -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')")"
-if [[ $PYTHON_VERSION == "313" ]]; then
+# TODO: Delete when there's a PyPI Cython release that supports free-threaded Python 3.13.
+FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")"
+if [[ $FREE_THREADED_BUILD == "True"  ]]; then
     python -m pip install -U pip
     python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
     python -m pip install ninja meson-python versioneer[toml]
diff --git a/scripts/cibw_before_test.sh b/scripts/cibw_before_test.sh
deleted file mode 100644
index 7d1b143881ced..0000000000000
--- a/scripts/cibw_before_test.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-# TODO: Delete when there's PyPI NumPy/Cython releases the support Python 3.13.
-# If free-threading support is not included in those releases, this script will have
-# to whether this runs for a free-threaded build instead.
-PYTHON_VERSION="$(python -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')")"
-if [[ $PYTHON_VERSION == "313" ]]; then
-    python -m pip install -U pip
-    python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
-fi

From f1e6cc184ae0534e11c0a2947f4948bc4c5e0a9d Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 19 Sep 2024 15:39:08 -0400
Subject: [PATCH 118/176] BLD: Final release prep for 2.2.3 (#59840)

* BLD: Final release prep

* change back perms

* debug

* try to fix license addition

* silence stable version warning?
---
 doc/source/conf.py             |  4 +++-
 doc/source/whatsnew/v2.2.2.rst |  2 +-
 doc/source/whatsnew/v2.2.3.rst | 23 ++++++++++++++++-------
 doc/source/whatsnew/v3.0.0.rst |  1 -
 pyproject.toml                 |  2 +-
 scripts/cibw_before_build.sh   |  5 +++++
 6 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 77dd5d03d311c..ddbda0aa3bf65 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -254,7 +254,9 @@
         "json_url": "https://pandas.pydata.org/versions.json",
         "version_match": switcher_version,
     },
-    "show_version_warning_banner": True,
+    # This shows a warning for patch releases since the
+    # patch version doesn't compare as equal (e.g. 2.2.1 != 2.2.0 but it should be)
+    "show_version_warning_banner": False,
     "icon_links": [
         {
             "name": "Mastodon",
diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
index 72a2f84c4aaee..fbe5e9b4febb5 100644
--- a/doc/source/whatsnew/v2.2.2.rst
+++ b/doc/source/whatsnew/v2.2.2.rst
@@ -56,4 +56,4 @@ Other
 Contributors
 ~~~~~~~~~~~~
 
-.. contributors:: v2.2.1..v2.2.2|HEAD
+.. contributors:: v2.2.1..v2.2.2
diff --git a/doc/source/whatsnew/v2.2.3.rst b/doc/source/whatsnew/v2.2.3.rst
index aa6e241e74b0a..1696a7b6449af 100644
--- a/doc/source/whatsnew/v2.2.3.rst
+++ b/doc/source/whatsnew/v2.2.3.rst
@@ -1,6 +1,6 @@
 .. _whatsnew_223:
 
-What's new in 2.2.3 (September XX, 2024)
+What's new in 2.2.3 (September 20, 2024)
 ----------------------------------------
 
 These are the changes in pandas 2.2.3. See :ref:`release` for a full changelog
@@ -9,28 +9,37 @@ including other versions of pandas.
 {{ header }}
 
 .. ---------------------------------------------------------------------------
-.. _whatsnew_223.regressions:
 
-Fixed regressions
-~~~~~~~~~~~~~~~~~
--
+.. _whatsnew_220.py13_compat:
+
+Pandas 2.2.3 is now compatible with Python 3.13
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pandas 2.2.3 is the first version of pandas that is generally compatible with the upcoming
+Python 3.13, and both wheels for free-threaded and normal Python 3.13 will be uploaded for
+this release.
+
+As usual please report any bugs discovered to our `issue tracker <https://github.com/pandas-dev/pandas/issues/new/choose>`_
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_223.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
--
+- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`)
+- Minor fixes for numpy 2.1 compatibility. (:issue:`59444`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_223.other:
 
 Other
 ~~~~~
--
+- Missing licenses for 3rd party dependencies were added back into the wheels. (:issue:`58632`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_223.contributors:
 
 Contributors
 ~~~~~~~~~~~~
+
+.. contributors:: v2.2.2..v2.2.3|HEAD
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 89a1c388b3ba1..c2a56afbc580e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -693,7 +693,6 @@ Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
-- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
diff --git a/pyproject.toml b/pyproject.toml
index 5ffd9d9a5608c..d0fcdc4b21b33 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -156,7 +156,7 @@ test-command = """
   pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \
   """
 free-threaded-support = true
-before-build = "bash {package}/scripts/cibw_before_build.sh"
+before-build = "PACKAGE_DIR={package} bash {package}/scripts/cibw_before_build.sh"
 
 [tool.cibuildwheel.windows]
 before-build = "pip install delvewheel && bash {package}/scripts/cibw_before_build.sh"
diff --git a/scripts/cibw_before_build.sh b/scripts/cibw_before_build.sh
index 6186340807f8f..679b91e3280ec 100644
--- a/scripts/cibw_before_build.sh
+++ b/scripts/cibw_before_build.sh
@@ -1,3 +1,8 @@
+# Add 3rd party licenses, like numpy does
+for file in $PACKAGE_DIR/LICENSES/*; do
+  cat $file >> $PACKAGE_DIR/LICENSE
+done
+
 # TODO: Delete when there's a PyPI Cython release that supports free-threaded Python 3.13.
 FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")"
 if [[ $FREE_THREADED_BUILD == "True"  ]]; then

From 2419343bfea5dba678146139ca9663d831c47b22 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 20 Sep 2024 07:39:20 -0400
Subject: [PATCH 119/176] BLD: Build wheels for Python 3.13 on aarch64 as well
 (#59847)

* BLD: Build wheels for Python 3.13 on aarch64 as well

* some fixups

* another typo
---
 .circleci/config.yml | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 27b6829dcda70..9c986e5b1b054 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -92,7 +92,13 @@ jobs:
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
             pip3 install cibuildwheel==2.20.0
-            cibuildwheel --output-dir wheelhouse
+            if [[ $CIBW_BUILD == cp313t* ]]; then
+              # TODO: temporarily run 3.13 free threaded builds without build isolation
+              # since we need pre-release cython
+              CIBW_BUILD_FRONTEND="pip; args: --no-build-isolation" cibuildwheel --output-dir wheelhouse
+            else
+              cibuildwheel --output-dir wheelhouse
+            fi
 
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
@@ -141,6 +147,10 @@ workflows:
               cibw-build: ["cp310-manylinux_aarch64",
                            "cp311-manylinux_aarch64",
                            "cp312-manylinux_aarch64",
+                           "cp313-manylinux_aarch64",
+                           "cp313t-manylinux_aarch64",
                            "cp310-musllinux_aarch64",
                            "cp311-musllinux_aarch64",
-                           "cp312-musllinux_aarch64",]
+                           "cp312-musllinux_aarch64",
+                           "cp313-musllinux_aarch64",
+                           "cp313t-musllinux_aarch64"]

From 71b395f2cf513f7c4ef8b50c608072bf3950e596 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 22 Sep 2024 19:27:28 +0530
Subject: [PATCH 120/176] DOC: fix RT03 for
 pandas.core.groupby.DataFrameGroupBy.hist (#59870)

---
 ci/code_checks.sh              | 1 -
 pandas/core/groupby/generic.py | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f2d9f582d8932..21104c2e00450 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -144,7 +144,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index eae33ddc1df29..bec9d344d42e2 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -2694,7 +2694,9 @@ def hist(
 
         Returns
         -------
-        matplotlib.Axes or numpy.ndarray of them
+        matplotlib.Axes or numpy.ndarray
+            A ``matplotlib.Axes`` object or an array of ``Axes`` objects, depending on
+            the layout and grouping.
 
         See Also
         --------

From 2cdb97e2f806d83965c7dee8fb5fcf164a340379 Mon Sep 17 00:00:00 2001
From: Fawaz Ahmed <fawazahmed0@hotmail.com>
Date: Tue, 24 Sep 2024 06:21:39 +0530
Subject: [PATCH 121/176] BUG: Fix precision loss in read_json (#59284)

---
 doc/source/whatsnew/v3.0.0.rst      |  1 +
 pandas/io/json/_json.py             |  3 ++-
 pandas/tests/io/json/test_pandas.py | 12 ++++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c2a56afbc580e..3b5183c43bcd0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -630,6 +630,7 @@ I/O
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
+- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
 - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index d077b9e0c4568..e9c9f5ba225a5 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -1168,6 +1168,7 @@ def _try_convert_data(
         """
         Try to parse a Series into a column by inferring dtype.
         """
+        org_data = data
         # don't try to coerce, unless a force conversion
         if use_dtypes:
             if not self.dtype:
@@ -1222,7 +1223,7 @@ def _try_convert_data(
         if len(data) and data.dtype in ("float", "object"):
             # coerce ints if we can
             try:
-                new_data = data.astype("int64")
+                new_data = org_data.astype("int64")
                 if (new_data == data).all():
                     data = new_data
                     converted = True
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 1c54232b8b510..d3328d1dfcaef 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2286,3 +2286,15 @@ def test_read_json_lines_rangeindex():
     result = read_json(StringIO(data), lines=True).index
     expected = RangeIndex(2)
     tm.assert_index_equal(result, expected, exact=True)
+
+
+def test_large_number():
+    # GH#20608
+    result = read_json(
+        StringIO('["9999999999999999"]'),
+        orient="values",
+        typ="series",
+        convert_dates=False,
+    )
+    expected = Series([9999999999999999])
+    tm.assert_series_equal(result, expected)

From dc24410c0fbbfff2b191247dc7dc963cc92c0321 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:15:06 +0530
Subject: [PATCH 122/176] DOC: fix SA01 for pandas.api.types.is_int64_dtype
 (#59862)

---
 ci/code_checks.sh            |  1 -
 pandas/core/dtypes/common.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 21104c2e00450..3d31781b886ab 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -117,7 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_float PR01,SA01" \
         -i "pandas.api.types.is_float_dtype SA01" \
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
-        -i "pandas.api.types.is_int64_dtype SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_interval_dtype SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index ff855f97a352b..0252927241ef4 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -886,6 +886,16 @@ def is_int64_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array or dtype is of the int64 dtype.
 
+    See Also
+    --------
+    api.types.is_float_dtype : Check whether the provided array or dtype is of a
+        float dtype.
+    api.types.is_bool_dtype : Check whether the provided array or dtype is of a
+        boolean dtype.
+    api.types.is_object_dtype : Check whether an array-like or dtype is of the
+        object dtype.
+    numpy.int64 : Numpy's 64-bit integer type.
+
     Notes
     -----
     Depending on system architecture, the return value of `is_int64_dtype(

From b91be12f8854d87e0f1c6cf9e2db7a5e68983be1 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:16:03 +0530
Subject: [PATCH 123/176] DOC: fix SA01, ES01 for
 pandas.api.types.is_float_dtype (#59861)

---
 ci/code_checks.sh            |  1 -
 pandas/core/dtypes/common.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 3d31781b886ab..119c6e2b33684 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -115,7 +115,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_dict_like PR07,SA01" \
         -i "pandas.api.types.is_file_like PR07,SA01" \
         -i "pandas.api.types.is_float PR01,SA01" \
-        -i "pandas.api.types.is_float_dtype SA01" \
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_interval_dtype SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 0252927241ef4..48d2106aff124 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1285,6 +1285,9 @@ def is_float_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of a float dtype.
 
+    The function checks for floating-point data types, which represent real numbers
+    that may have fractional components.
+
     Parameters
     ----------
     arr_or_dtype : array-like or dtype
@@ -1295,6 +1298,15 @@ def is_float_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array or dtype is of a float dtype.
 
+    See Also
+    --------
+    api.types.is_numeric_dtype : Check whether the provided array or dtype is of
+        a numeric dtype.
+    api.types.is_integer_dtype : Check whether the provided array or dtype is of
+        an integer dtype.
+    api.types.is_object_dtype : Check whether an array-like or dtype is of the
+        object dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_float_dtype

From b81ed16389385ad1272e94d2796db31ce8ccbafd Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:19:34 +0530
Subject: [PATCH 124/176] DOC: fix SA01, ES01 for
 pandas.Series.sparse.sp_values (#59859)

---
 ci/code_checks.sh                  |  1 -
 pandas/core/arrays/sparse/array.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 119c6e2b33684..42955a6476734 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.fill_value SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
-        -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index a09dc20af3b36..40012357f40cd 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -603,6 +603,18 @@ def sp_values(self) -> np.ndarray:
         """
         An ndarray containing the non- ``fill_value`` values.
 
+        This property returns the actual data values stored in the sparse
+        representation, excluding the values that are equal to the ``fill_value``.
+        The result is an ndarray of the underlying values, preserving the sparse
+        structure by omitting the default ``fill_value`` entries.
+
+        See Also
+        --------
+        Series.sparse.to_dense : Convert a Series from sparse values to dense.
+        Series.sparse.fill_value : Elements in `data` that are `fill_value` are
+            not stored.
+        Series.sparse.density : The percent of non- ``fill_value`` points, as decimal.
+
         Examples
         --------
         >>> from pandas.arrays import SparseArray

From 7cebd7822ba0598f53fdd6dd8141c66b949c9023 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:22:11 +0530
Subject: [PATCH 125/176] DOC: fix SA01 for pandas.Series.sparse.fill_value
 (#59858)

---
 ci/code_checks.sh                  | 1 -
 pandas/core/arrays/sparse/array.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 42955a6476734..e0d6efa0278e4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -97,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
-        -i "pandas.Series.sparse.fill_value SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Timedelta.max PR02" \
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 40012357f40cd..c8ec4068ca199 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -635,6 +635,12 @@ def fill_value(self):
 
         For memory savings, this should be the most common value in the array.
 
+        See Also
+        --------
+        SparseDtype : Dtype for data stored in :class:`SparseArray`.
+        Series.value_counts : Return a Series containing counts of unique values.
+        Series.fillna : Fill NA/NaN in a Series with a specified value.
+
         Examples
         --------
         >>> ser = pd.Series([0, 0, 2, 2, 2], dtype="Sparse[int]")

From 5b6997ca14187b31a87490b9e61e3af4cbdda6d7 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:24:42 +0530
Subject: [PATCH 126/176] DOC: fix SA01, ES01 for
 pandas.tseries.offsets.SemiMonthEnd (#59856)

DOC: fix SA01 for pandas.tseries.offsets.SemiMonthEnd
---
 ci/code_checks.sh               |  1 -
 pandas/_libs/tslibs/offsets.pyx | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index e0d6efa0278e4..7cc314007aabd 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -364,7 +364,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.tseries.offsets.SemiMonthBegin.n GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.normalize GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.rule_code GL08" \
-        -i "pandas.tseries.offsets.SemiMonthEnd SA01" \
         -i "pandas.tseries.offsets.SemiMonthEnd.day_of_month GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd.is_on_offset GL08" \
         -i "pandas.tseries.offsets.SemiMonthEnd.n GL08" \
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 4fa1af0ec882c..4db96fbaa3aad 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -3316,6 +3316,11 @@ cdef class SemiMonthEnd(SemiMonthOffset):
     """
     Two DateOffset's per month repeating on the last day of the month & day_of_month.
 
+    This offset allows for flexibility in generating date ranges or adjusting dates
+    to the end of a month or a specific day in the month, such as the 15th or the last
+    day of the month. It is useful for financial or scheduling applications where
+    events occur bi-monthly.
+
     Attributes
     ----------
     n : int, default 1
@@ -3325,6 +3330,13 @@ cdef class SemiMonthEnd(SemiMonthOffset):
     day_of_month : int, {1, 3,...,27}, default 15
         A specific integer for the day of the month.
 
+    See Also
+    --------
+    tseries.offsets.SemiMonthBegin : Offset for semi-monthly frequencies, starting at
+        the beginning of the month.
+    tseries.offsets.MonthEnd : Offset to the last calendar day of the month.
+    tseries.offsets.MonthBegin : Offset to the first calendar day of the month.
+
     Examples
     --------
     >>> ts = pd.Timestamp(2022, 1, 14)

From a9e30c5f62d080aea7629ca17cf1e9c0e8c3e080 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 25 Sep 2024 19:57:49 +0200
Subject: [PATCH 127/176] String dtype: map builtin str alias to StringDtype
 (#59685)

* String dtype: map builtin str alias to StringDtype

* fix tests

* fix datetimelike astype and more tests

* remove xfails

* try fix typing

* fix copy_view tests

* fix remaining tests with infer_string enabled

* ignore typing issue for now

* move to common.py

* simplify Categorical._str_get_dummies

* small cleanup

* fix ensure_string_array to not modify extension arrays inplace

* fix ensure_string_array once more + fix is_extension_array_dtype for str

* still xfail TestArrowArray::test_astype_str when not using infer_string

* ensure maybe_convert_objects copies object dtype input array when inferring StringDtype

* update test_1d_object_array_does_not_copy test

* update constructor copy test + do not copy in maybe_convert_objects?

* skip str.get_dummies test for now

* use pandas_dtype() instead of registry.find

* fix corner cases for calling pandas_dtype

* add TODO comment in ensure_string_array
---
 pandas/_libs/lib.pyx                          |  9 +++-
 pandas/_testing/__init__.py                   |  2 +-
 pandas/core/arrays/categorical.py             |  4 +-
 pandas/core/arrays/datetimelike.py            | 10 ++++-
 pandas/core/dtypes/common.py                  | 18 +++++++-
 pandas/core/indexes/base.py                   |  6 ++-
 pandas/core/indexes/interval.py               |  3 +-
 pandas/tests/arrays/floating/test_astype.py   |  6 +--
 pandas/tests/arrays/integer/test_dtypes.py    |  6 +--
 pandas/tests/arrays/sparse/test_astype.py     |  4 +-
 pandas/tests/arrays/sparse/test_dtype.py      |  2 +-
 pandas/tests/dtypes/test_common.py            | 12 ++++++
 pandas/tests/extension/base/casting.py        |  4 +-
 pandas/tests/extension/json/array.py          |  3 +-
 pandas/tests/extension/test_arrow.py          | 29 +++----------
 pandas/tests/frame/methods/test_astype.py     | 17 ++++----
 .../tests/frame/methods/test_select_dtypes.py |  5 ++-
 pandas/tests/frame/test_constructors.py       | 41 +++++++++++++++----
 .../indexes/datetimes/methods/test_astype.py  | 15 ++++---
 pandas/tests/indexes/object/test_astype.py    |  4 +-
 .../indexes/period/methods/test_astype.py     |  9 +++-
 .../indexes/timedeltas/methods/test_astype.py |  9 +++-
 pandas/tests/interchange/test_impl.py         |  1 +
 pandas/tests/io/excel/test_readers.py         |  6 +--
 .../io/parser/dtypes/test_dtypes_basic.py     | 17 ++++----
 pandas/tests/io/parser/test_na_values.py      |  2 -
 .../io/parser/test_python_parser_only.py      |  6 +--
 pandas/tests/series/methods/test_astype.py    | 30 ++++++++------
 pandas/tests/series/methods/test_map.py       |  4 +-
 pandas/tests/series/test_constructors.py      |  2 +-
 pandas/tests/strings/test_get_dummies.py      |  3 ++
 pandas/tests/test_algos.py                    |  7 +++-
 32 files changed, 185 insertions(+), 111 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 3f2dfbfb3b404..8af48a861967a 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -754,7 +754,14 @@ cpdef ndarray[object] ensure_string_array(
 
     if hasattr(arr, "to_numpy"):
 
-        if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
+        if (
+            hasattr(arr, "dtype")
+            and arr.dtype.kind in "mM"
+            # TODO: we should add a custom ArrowExtensionArray.astype implementation
+            # that handles astype(str) specifically, avoiding ending up here and
+            # then we can remove the below check for `_pa_array` (for ArrowEA)
+            and not hasattr(arr, "_pa_array")
+        ):
             # dtype check to exclude DataFrame
             # GH#41409 TODO: not a great place for this
             out = arr.astype(str).astype(object)
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 5fa1a984b8aea..0be01da1816a2 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -108,7 +108,7 @@
 
 COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
 if using_string_dtype():
-    STRING_DTYPES: list[Dtype] = [str, "U"]
+    STRING_DTYPES: list[Dtype] = ["U"]
 else:
     STRING_DTYPES: list[Dtype] = [str, "str", "U"]  # type: ignore[no-redef]
 COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 8e0225b31e17b..a69e197df851d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2685,7 +2685,9 @@ def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
         # sep may not be in categories. Just bail on this.
         from pandas.core.arrays import NumpyExtensionArray
 
-        return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep, dtype)
+        return NumpyExtensionArray(self.to_numpy(str, na_value="NaN"))._str_get_dummies(
+            sep, dtype
+        )
 
     # ------------------------------------------------------------------------
     # GroupBy Methods
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index fbe1677b95b33..7be8daa09c758 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -471,10 +471,16 @@ def astype(self, dtype, copy: bool = True):
 
             return self._box_values(self.asi8.ravel()).reshape(self.shape)
 
+        elif is_string_dtype(dtype):
+            if isinstance(dtype, ExtensionDtype):
+                arr_object = self._format_native_types(na_rep=dtype.na_value)  # type: ignore[arg-type]
+                cls = dtype.construct_array_type()
+                return cls._from_sequence(arr_object, dtype=dtype, copy=False)
+            else:
+                return self._format_native_types()
+
         elif isinstance(dtype, ExtensionDtype):
             return super().astype(dtype, copy=copy)
-        elif is_string_dtype(dtype):
-            return self._format_native_types()
         elif dtype.kind in "iu":
             # we deliberately ignore int32 vs. int64 here.
             # See https://github.com/pandas-dev/pandas/issues/24381 for more.
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 48d2106aff124..1a38bb03b2c1c 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -12,6 +12,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import (
     Interval,
     Period,
@@ -1470,7 +1472,15 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     elif isinstance(dtype, np.dtype):
         return False
     else:
-        return registry.find(dtype) is not None
+        try:
+            with warnings.catch_warnings():
+                # pandas_dtype(..) can raise UserWarning for class input
+                warnings.simplefilter("ignore", UserWarning)
+                dtype = pandas_dtype(dtype)
+        except (TypeError, ValueError):
+            # np.dtype(..) can raise ValueError
+            return False
+        return isinstance(dtype, ExtensionDtype)
 
 
 def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
@@ -1773,6 +1783,12 @@ def pandas_dtype(dtype) -> DtypeObj:
     elif isinstance(dtype, (np.dtype, ExtensionDtype)):
         return dtype
 
+    # builtin aliases
+    if dtype is str and using_string_dtype():
+        from pandas.core.arrays.string_ import StringDtype
+
+        return StringDtype(na_value=np.nan)
+
     # registered extension types
     result = registry.find(dtype)
     if result is not None:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2346c20004210..852049804a4f5 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6262,7 +6262,11 @@ def _should_compare(self, other: Index) -> bool:
             return False
 
         dtype = _unpack_nested_dtype(other)
-        return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
+        return (
+            self._is_comparable_dtype(dtype)
+            or is_object_dtype(dtype)
+            or is_string_dtype(dtype)
+        )
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         """
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 359cdf880937b..8feac890883eb 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -51,6 +51,7 @@
     is_number,
     is_object_dtype,
     is_scalar,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -712,7 +713,7 @@ def _get_indexer(
             # left/right get_indexer, compare elementwise, equality -> match
             indexer = self._get_indexer_unique_sides(target)
 
-        elif not is_object_dtype(target.dtype):
+        elif not (is_object_dtype(target.dtype) or is_string_dtype(target.dtype)):
             # homogeneous scalar index: use IntervalTree
             # we should always have self._should_partial_index(target) here
             target = self._maybe_convert_i8(target)
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index ccf644b34051d..752ebe194ffcf 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -68,11 +68,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
 
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index fadd7ac67b58d..7972ba7b9fb0f 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -281,11 +281,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py
index 83a507e679d46..e6e4a11a0f5ab 100644
--- a/pandas/tests/arrays/sparse/test_astype.py
+++ b/pandas/tests/arrays/sparse/test_astype.py
@@ -81,8 +81,8 @@ def test_astype_all(self, any_real_numpy_dtype):
             ),
             (
                 SparseArray([0, 1, 10]),
-                str,
-                SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
+                np.str_,
+                SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")),
             ),
             (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
             (
diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py
index 1819744d9a9ae..6143163735ab8 100644
--- a/pandas/tests/arrays/sparse/test_dtype.py
+++ b/pandas/tests/arrays/sparse/test_dtype.py
@@ -184,7 +184,7 @@ def test_construct_from_string_fill_value_raises(string):
     [
         (SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
         (SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
-        (SparseDtype(int, 1), str, SparseDtype(object, "1")),
+        (SparseDtype(int, 1), np.str_, SparseDtype(object, "1")),
         (SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
     ],
 )
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 2c2dff7a957fe..e338fb1331734 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -810,11 +810,23 @@ def test_pandas_dtype_string_dtypes(string_storage):
         "pyarrow" if HAS_PYARROW else "python", na_value=np.nan
     )
 
+    with pd.option_context("future.infer_string", True):
+        # with the default string_storage setting
+        result = pandas_dtype(str)
+    assert result == pd.StringDtype(
+        "pyarrow" if HAS_PYARROW else "python", na_value=np.nan
+    )
+
     with pd.option_context("future.infer_string", True):
         with pd.option_context("string_storage", string_storage):
             result = pandas_dtype("str")
     assert result == pd.StringDtype(string_storage, na_value=np.nan)
 
+    with pd.option_context("future.infer_string", True):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype(str)
+    assert result == pd.StringDtype(string_storage, na_value=np.nan)
+
     with pd.option_context("future.infer_string", False):
         with pd.option_context("string_storage", string_storage):
             result = pandas_dtype("str")
diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
index e924e38ee5030..8e3f21e1a4f56 100644
--- a/pandas/tests/extension/base/casting.py
+++ b/pandas/tests/extension/base/casting.py
@@ -44,8 +44,8 @@ def test_tolist(self, data):
         assert result == expected
 
     def test_astype_str(self, data):
-        result = pd.Series(data[:5]).astype(str)
-        expected = pd.Series([str(x) for x in data[:5]], dtype=str)
+        result = pd.Series(data[:2]).astype(str)
+        expected = pd.Series([str(x) for x in data[:2]], dtype=str)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 3a4391edc99ef..4fa48023fbc95 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -208,9 +208,8 @@ def astype(self, dtype, copy=True):
                 return self.copy()
             return self
         elif isinstance(dtype, StringDtype):
-            value = self.astype(str)  # numpy doesn't like nested dicts
             arr_cls = dtype.construct_array_type()
-            return arr_cls._from_sequence(value, dtype=dtype, copy=False)
+            return arr_cls._from_sequence(self, dtype=dtype, copy=False)
         elif not copy:
             return np.asarray([dict(x) for x in self], dtype=dtype)
         else:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index f86d927ddda67..f56094dfd47ca 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -43,7 +43,6 @@
     pa_version_under13p0,
     pa_version_under14p0,
 )
-import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
@@ -292,7 +291,7 @@ def test_map(self, data_missing, na_action):
                 expected = data_missing.to_numpy()
             tm.assert_numpy_array_equal(result, expected)
 
-    def test_astype_str(self, data, request):
+    def test_astype_str(self, data, request, using_infer_string):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_binary(pa_dtype):
             request.applymarker(
@@ -300,9 +299,10 @@ def test_astype_str(self, data, request):
                     reason=f"For {pa_dtype} .astype(str) decodes.",
                 )
             )
-        elif (
-            pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
-        ) or pa.types.is_duration(pa_dtype):
+        elif not using_infer_string and (
+            (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None)
+            or pa.types.is_duration(pa_dtype)
+        ):
             request.applymarker(
                 pytest.mark.xfail(
                     reason="pd.Timestamp/pd.Timedelta repr different from numpy repr",
@@ -310,25 +310,6 @@ def test_astype_str(self, data, request):
             )
         super().test_astype_str(data)
 
-    @pytest.mark.parametrize(
-        "nullable_string_dtype",
-        [
-            "string[python]",
-            pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-        ],
-    )
-    def test_astype_string(self, data, nullable_string_dtype, request):
-        pa_dtype = data.dtype.pyarrow_dtype
-        if (
-            pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
-        ) or pa.types.is_duration(pa_dtype):
-            request.applymarker(
-                pytest.mark.xfail(
-                    reason="pd.Timestamp/pd.Timedelta repr different from numpy repr",
-                )
-            )
-        super().test_astype_string(data, nullable_string_dtype)
-
     def test_from_dtype(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_string(pa_dtype) or pa.types.is_decimal(pa_dtype):
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 8647df0e8ad96..ab3743283ea13 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -168,21 +168,21 @@ def test_astype_str(self):
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
             },
-            dtype="object",
+            dtype="str",
         )
 
         tm.assert_frame_equal(result, expected)
 
-    def test_astype_str_float(self):
+    def test_astype_str_float(self, using_infer_string):
         # see GH#11302
         result = DataFrame([np.nan]).astype(str)
-        expected = DataFrame(["nan"], dtype="object")
+        expected = DataFrame([np.nan if using_infer_string else "nan"], dtype="str")
 
         tm.assert_frame_equal(result, expected)
         result = DataFrame([1.12345678901234567890]).astype(str)
 
         val = "1.1234567890123457"
-        expected = DataFrame([val], dtype="object")
+        expected = DataFrame([val], dtype="str")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype_class", [dict, Series])
@@ -284,7 +284,7 @@ def test_astype_duplicate_col_series_arg(self):
         result = df.astype(dtypes)
         expected = DataFrame(
             {
-                0: Series(vals[:, 0].astype(str), dtype=object),
+                0: Series(vals[:, 0].astype(str), dtype="str"),
                 1: vals[:, 1],
                 2: pd.array(vals[:, 2], dtype="Float64"),
                 3: vals[:, 3],
@@ -647,9 +647,10 @@ def test_astype_dt64tz(self, timezone_frame):
             # dt64tz->dt64 deprecated
             timezone_frame.astype("datetime64[ns]")
 
-    def test_astype_dt64tz_to_str(self, timezone_frame):
+    def test_astype_dt64tz_to_str(self, timezone_frame, using_infer_string):
         # str formatting
         result = timezone_frame.astype(str)
+        na_value = np.nan if using_infer_string else "NaT"
         expected = DataFrame(
             [
                 [
@@ -657,7 +658,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                     "2013-01-01 00:00:00-05:00",
                     "2013-01-01 00:00:00+01:00",
                 ],
-                ["2013-01-02", "NaT", "NaT"],
+                ["2013-01-02", na_value, na_value],
                 [
                     "2013-01-03",
                     "2013-01-03 00:00:00-05:00",
@@ -665,7 +666,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                 ],
             ],
             columns=timezone_frame.columns,
-            dtype="object",
+            dtype="str",
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index 875dca321635f..0354e9df3d168 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -99,6 +99,9 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
             ei = df[["a"]]
             tm.assert_frame_equal(ri, ei)
 
+            ri = df.select_dtypes(include=[str])
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame(
             {
@@ -358,7 +361,7 @@ def test_select_dtypes_datetime_with_tz(self):
     @pytest.mark.parametrize("dtype", [str, "str", np.bytes_, "S1", np.str_, "U1"])
     @pytest.mark.parametrize("arg", ["include", "exclude"])
     def test_select_dtypes_str_raises(self, dtype, arg, using_infer_string):
-        if using_infer_string and dtype == "str":
+        if using_infer_string and (dtype == "str" or dtype is str):
             # this is tested below
             pytest.skip("Selecting string columns works with future strings")
         df = DataFrame(
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 3d46e03547c38..0a924aa393be5 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -24,7 +24,6 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
-from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
 
@@ -82,7 +81,7 @@ def test_constructor_from_ndarray_with_str_dtype(self):
         #  with an array of strings each of which is e.g. "[0 1 2]"
         arr = np.arange(12).reshape(4, 3)
         df = DataFrame(arr, dtype=str)
-        expected = DataFrame(arr.astype(str), dtype=object)
+        expected = DataFrame(arr.astype(str), dtype="str")
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_from_2d_datetimearray(self):
@@ -300,18 +299,38 @@ def test_constructor_dtype_nocast_view_2d_array(self):
         df2 = DataFrame(df.values, dtype=df[0].dtype)
         assert df2._mgr.blocks[0].values.flags.c_contiguous
 
-    @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies")
-    def test_1d_object_array_does_not_copy(self):
+    def test_1d_object_array_does_not_copy(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
         df = DataFrame(arr, copy=False)
+        if using_infer_string:
+            if df[0].dtype.storage == "pyarrow":
+                # object dtype strings are converted to arrow memory,
+                # no numpy arrays to compare
+                pass
+            else:
+                assert np.shares_memory(df[0].to_numpy(), arr)
+        else:
+            assert np.shares_memory(df.values, arr)
+
+        df = DataFrame(arr, dtype=object, copy=False)
         assert np.shares_memory(df.values, arr)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="conversion copies")
-    def test_2d_object_array_does_not_copy(self):
+    def test_2d_object_array_does_not_copy(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
         df = DataFrame(arr, copy=False)
+        if using_infer_string:
+            if df[0].dtype.storage == "pyarrow":
+                # object dtype strings are converted to arrow memory,
+                # no numpy arrays to compare
+                pass
+            else:
+                assert np.shares_memory(df[0].to_numpy(), arr)
+        else:
+            assert np.shares_memory(df.values, arr)
+
+        df = DataFrame(arr, dtype=object, copy=False)
         assert np.shares_memory(df.values, arr)
 
     def test_constructor_dtype_list_data(self):
@@ -1766,12 +1785,18 @@ def test_constructor_column_duplicates(self):
 
         tm.assert_frame_equal(idf, edf)
 
-    def test_constructor_empty_with_string_dtype(self):
+    def test_constructor_empty_with_string_dtype(self, using_infer_string):
         # GH 9428
         expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
+        expected_str = DataFrame(
+            index=[0, 1], columns=[0, 1], dtype=pd.StringDtype(na_value=np.nan)
+        )
 
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
-        tm.assert_frame_equal(df, expected)
+        if using_infer_string:
+            tm.assert_frame_equal(df, expected_str)
+        else:
+            tm.assert_frame_equal(df, expected)
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
         tm.assert_frame_equal(df, expected)
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5")
diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py
index 81dc3b3ecc45e..62be8903da206 100644
--- a/pandas/tests/indexes/datetimes/methods/test_astype.py
+++ b/pandas/tests/indexes/datetimes/methods/test_astype.py
@@ -101,13 +101,16 @@ def test_astype_tznaive_to_tzaware(self):
             # dt64->dt64tz deprecated
             idx._data.astype("datetime64[ns, US/Eastern]")
 
-    def test_astype_str_nat(self):
+    def test_astype_str_nat(self, using_infer_string):
         # GH 13149, GH 13209
         # verify that we are returning NaT as a string (and not unicode)
 
         idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
         result = idx.astype(str)
-        expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
+        if using_infer_string:
+            expected = Index(["2016-05-16", None, None, None], dtype="str")
+        else:
+            expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
         tm.assert_index_equal(result, expected)
 
     def test_astype_str(self):
@@ -117,7 +120,7 @@ def test_astype_str(self):
         expected = Index(
             ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -132,7 +135,7 @@ def test_astype_str_tz_and_name(self):
                 "2012-01-03 00:00:00-05:00",
             ],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -143,7 +146,7 @@ def test_astype_str_freq_and_name(self):
         expected = Index(
             ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -155,7 +158,7 @@ def test_astype_str_freq_and_tz(self):
         result = dti.astype(str)
         expected = Index(
             ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
-            dtype=object,
+            dtype="str",
             name="test_name",
         )
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py
index 9c1ef302c5b51..ce05b5e9f2238 100644
--- a/pandas/tests/indexes/object/test_astype.py
+++ b/pandas/tests/indexes/object/test_astype.py
@@ -15,12 +15,12 @@ def test_astype_str_from_bytes():
     #  ensure_string_array which does f"{val}"
     idx = Index(["あ", b"a"], dtype="object")
     result = idx.astype(str)
-    expected = Index(["あ", "a"], dtype="object")
+    expected = Index(["あ", "a"], dtype="str")
     tm.assert_index_equal(result, expected)
 
     # while we're here, check that Series.astype behaves the same
     result = Series(idx).astype(str)
-    expected = Series(expected, dtype=object)
+    expected = Series(expected, dtype="str")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py
index d545bfd2fae0f..af3c2667f51b4 100644
--- a/pandas/tests/indexes/period/methods/test_astype.py
+++ b/pandas/tests/indexes/period/methods/test_astype.py
@@ -22,7 +22,7 @@ def test_astype_raises(self, dtype):
         with pytest.raises(TypeError, match=msg):
             idx.astype(dtype)
 
-    def test_astype_conversion(self):
+    def test_astype_conversion(self, using_infer_string):
         # GH#13149, GH#13209
         idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.nan], freq="D", name="idx")
 
@@ -41,7 +41,12 @@ def test_astype_conversion(self):
         tm.assert_index_equal(result, expected)
 
         result = idx.astype(str)
-        expected = Index([str(x) for x in idx], name="idx", dtype=object)
+        if using_infer_string:
+            expected = Index(
+                [str(x) if x is not NaT else None for x in idx], name="idx", dtype="str"
+            )
+        else:
+            expected = Index([str(x) for x in idx], name="idx", dtype=object)
         tm.assert_index_equal(result, expected)
 
         idx = period_range("1990", "2009", freq="Y", name="idx")
diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py
index 311f2b5c9aa59..5166cadae499e 100644
--- a/pandas/tests/indexes/timedeltas/methods/test_astype.py
+++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py
@@ -44,7 +44,7 @@ def test_astype_object_with_nat(self):
         tm.assert_index_equal(result, expected)
         assert idx.tolist() == expected_list
 
-    def test_astype(self):
+    def test_astype(self, using_infer_string):
         # GH 13149, GH 13209
         idx = TimedeltaIndex([1e14, "NaT", NaT, np.nan], name="idx")
 
@@ -61,7 +61,12 @@ def test_astype(self):
         tm.assert_index_equal(result, expected)
 
         result = idx.astype(str)
-        expected = Index([str(x) for x in idx], name="idx", dtype=object)
+        if using_infer_string:
+            expected = Index(
+                [str(x) if x is not NaT else None for x in idx], name="idx", dtype="str"
+            )
+        else:
+            expected = Index([str(x) for x in idx], name="idx", dtype=object)
         tm.assert_index_equal(result, expected)
 
         rng = timedelta_range("1 days", periods=10)
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index 38961345dc1f2..29ce9d0c03111 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -401,6 +401,7 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
     pd.api.interchange.from_dataframe(df)
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_empty_string_column():
     # https://github.com/pandas-dev/pandas/issues/56703
     df = pd.DataFrame({"a": []}, dtype=str)
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index b831ec3bb2c6a..3989e022dbbd2 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -587,7 +587,7 @@ def test_reader_dtype(self, read_ext):
 
         expected["a"] = expected["a"].astype("float64")
         expected["b"] = expected["b"].astype("float32")
-        expected["c"] = Series(["001", "002", "003", "004"], dtype=object)
+        expected["c"] = Series(["001", "002", "003", "004"], dtype="str")
         tm.assert_frame_equal(actual, expected)
 
         msg = "Unable to convert column d to type int64"
@@ -611,8 +611,8 @@ def test_reader_dtype(self, read_ext):
                 {
                     "a": Series([1, 2, 3, 4], dtype="float64"),
                     "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"),
-                    "c": Series(["001", "002", "003", "004"], dtype=object),
-                    "d": Series(["1", "2", np.nan, "4"], dtype=object),
+                    "c": Series(["001", "002", "003", "004"], dtype="str"),
+                    "d": Series(["1", "2", np.nan, "4"], dtype="str"),
                 },
             ),
         ],
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index b664423364f6b..e02562ac8d93d 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -31,7 +31,7 @@
 @pytest.mark.parametrize("dtype", [str, object])
 @pytest.mark.parametrize("check_orig", [True, False])
 @pytest.mark.usefixtures("pyarrow_xfail")
-def test_dtype_all_columns(all_parsers, dtype, check_orig):
+def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
     # see gh-3795, gh-6607
     parser = all_parsers
 
@@ -49,8 +49,10 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig):
         if check_orig:
             expected = df.copy()
             result = result.astype(float)
-        else:
+        elif using_infer_string and dtype is str:
             expected = df.astype(str)
+        else:
+            expected = df.astype(str).astype(object)
 
         tm.assert_frame_equal(result, expected)
 
@@ -300,7 +302,6 @@ def test_true_values_cast_to_bool(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.usefixtures("pyarrow_xfail")
 @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
 def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
@@ -316,7 +317,6 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.usefixtures("pyarrow_xfail")
 def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
     # GH#42022
@@ -565,7 +565,7 @@ def test_string_inference(all_parsers):
 
 
 @pytest.mark.parametrize("dtype", ["O", object, "object", np.object_, str, np.str_])
-def test_string_inference_object_dtype(all_parsers, dtype):
+def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
     # GH#56047
     data = """a,b
 x,a
@@ -575,10 +575,11 @@ def test_string_inference_object_dtype(all_parsers, dtype):
     with pd.option_context("future.infer_string", True):
         result = parser.read_csv(StringIO(data), dtype=dtype)
 
+    expected_dtype = pd.StringDtype(na_value=np.nan) if dtype is str else object
     expected = DataFrame(
         {
-            "a": pd.Series(["x", "y", "z"], dtype=object),
-            "b": pd.Series(["a", "a", "a"], dtype=object),
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
+            "b": pd.Series(["a", "a", "a"], dtype=expected_dtype),
         },
         columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
     )
@@ -589,7 +590,7 @@ def test_string_inference_object_dtype(all_parsers, dtype):
 
     expected = DataFrame(
         {
-            "a": pd.Series(["x", "y", "z"], dtype=object),
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
             "b": pd.Series(["a", "a", "a"], dtype=pd.StringDtype(na_value=np.nan)),
         },
         columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index b612e60c959b1..89645b526f2ee 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -667,7 +667,6 @@ def test_inf_na_values_with_int_index(all_parsers):
     tm.assert_frame_equal(out, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @xfail_pyarrow  # mismatched shape
 @pytest.mark.parametrize("na_filter", [True, False])
 def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
@@ -719,7 +718,6 @@ def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
 # TODO: this test isn't about the na_values keyword, it is about the empty entries
 #  being returned with NaN entries, whereas the pyarrow engine returns "nan"
 @xfail_pyarrow  # mismatched shapes
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_str_nan_dropped(all_parsers):
     # see gh-21131
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 26480010fc687..a5bb151e84f47 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -18,8 +18,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     ParserError,
     ParserWarning,
@@ -499,7 +497,6 @@ def test_header_int_do_not_infer_multiindex_names_on_different_line(python_parse
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "dtype", [{"a": object}, {"a": str, "b": np.int64, "c": np.int64}]
 )
@@ -524,10 +521,11 @@ def test_no_thousand_convert_with_dot_for_non_numeric_cols(python_parser_only, d
             "c": [0, 4000, 131],
         }
     )
+    if dtype["a"] == object:
+        expected["a"] = expected["a"].astype(object)
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "dtype,expected",
     [
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index 579d41f964df0..4a7e204ee4161 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -76,7 +76,7 @@ def test_astype_dict_like(self, dtype_class):
 
         dt1 = dtype_class({"abc": str})
         result = ser.astype(dt1)
-        expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
+        expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype="str")
         tm.assert_series_equal(result, expected)
 
         dt2 = dtype_class({"abc": "float64"})
@@ -173,10 +173,14 @@ def test_astype_empty_constructor_equality(self, dtype):
     def test_astype_str_map(self, dtype, data, using_infer_string):
         # see GH#4405
         series = Series(data)
+        using_string_dtype = using_infer_string and dtype is str
         result = series.astype(dtype)
-        expected = series.map(str)
-        if using_infer_string:
-            expected = expected.astype(object)
+        if using_string_dtype:
+            expected = series.map(lambda val: str(val) if val is not np.nan else np.nan)
+        else:
+            expected = series.map(str)
+            if using_infer_string:
+                expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
     def test_astype_float_to_period(self):
@@ -213,7 +217,7 @@ def test_astype_dt64_to_str(self):
         # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
         dti = date_range("2012-01-01", periods=3)
         result = Series(dti).astype(str)
-        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
+        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype="str")
         tm.assert_series_equal(result, expected)
 
     def test_astype_dt64tz_to_str(self):
@@ -226,7 +230,7 @@ def test_astype_dt64tz_to_str(self):
                 "2012-01-02 00:00:00-05:00",
                 "2012-01-03 00:00:00-05:00",
             ],
-            dtype=object,
+            dtype="str",
         )
         tm.assert_series_equal(result, expected)
 
@@ -286,13 +290,13 @@ def test_astype_str_cast_dt64(self):
         ts = Series([Timestamp("2010-01-04 00:00:00")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04"], dtype=object)
+        expected = Series(["2010-01-04"], dtype="str")
         tm.assert_series_equal(res, expected)
 
         ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
+        expected = Series(["2010-01-04 00:00:00-05:00"], dtype="str")
         tm.assert_series_equal(res, expected)
 
     def test_astype_str_cast_td64(self):
@@ -301,7 +305,7 @@ def test_astype_str_cast_td64(self):
         td = Series([Timedelta(1, unit="D")])
         ser = td.astype(str)
 
-        expected = Series(["1 days"], dtype=object)
+        expected = Series(["1 days"], dtype="str")
         tm.assert_series_equal(ser, expected)
 
     def test_dt64_series_astype_object(self):
@@ -347,7 +351,7 @@ def test_astype_from_float_to_str(self, any_float_dtype):
         # https://github.com/pandas-dev/pandas/issues/36451
         ser = Series([0.1], dtype=any_float_dtype)
         result = ser.astype(str)
-        expected = Series(["0.1"], dtype=object)
+        expected = Series(["0.1"], dtype="str")
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -358,11 +362,13 @@ def test_astype_from_float_to_str(self, any_float_dtype):
             (NA, "<NA>"),
         ],
     )
-    def test_astype_to_str_preserves_na(self, value, string_value):
+    def test_astype_to_str_preserves_na(self, value, string_value, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/36904
         ser = Series(["a", "b", value], dtype=object)
         result = ser.astype(str)
-        expected = Series(["a", "b", string_value], dtype=object)
+        expected = Series(
+            ["a", "b", None if using_infer_string else string_value], dtype="str"
+        )
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index fe84ffafa70b4..7fa8686fcc6c8 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -549,13 +549,11 @@ def f(x):
         (list(range(3)), {0: 42}, [42] + [np.nan] * 3),
     ],
 )
-def test_map_missing_mixed(vals, mapping, exp, using_infer_string):
+def test_map_missing_mixed(vals, mapping, exp):
     # GH20495
     s = Series(vals + [np.nan])
     result = s.map(mapping)
     exp = Series(exp)
-    if using_infer_string and mapping == {np.nan: "not NaN"}:
-        exp.iloc[-1] = np.nan
     tm.assert_series_equal(result, exp)
 
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 1771a4dfdb71f..69f42b5e42878 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -229,7 +229,7 @@ def test_constructor_empty(self, input_class, using_infer_string):
             # GH 19853 : with empty string, index and dtype str
             empty = Series("", dtype=str, index=range(3))
             if using_infer_string:
-                empty2 = Series("", index=range(3), dtype=object)
+                empty2 = Series("", index=range(3), dtype="str")
             else:
                 empty2 = Series("", index=range(3))
             tm.assert_series_equal(empty, empty2)
diff --git a/pandas/tests/strings/test_get_dummies.py b/pandas/tests/strings/test_get_dummies.py
index 0656f505dc745..3b989e284ca25 100644
--- a/pandas/tests/strings/test_get_dummies.py
+++ b/pandas/tests/strings/test_get_dummies.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -96,6 +98,7 @@ def test_get_dummies_with_pyarrow_dtype(any_string_dtype, dtype):
 
 
 # GH#47872
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_get_dummies_with_str_dtype(any_string_dtype):
     s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
     result = s.str.get_dummies("|", dtype=str)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 06fd81ed722d9..dac74a0e32a42 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1877,13 +1877,16 @@ def test_strobj_mode(self):
         tm.assert_series_equal(ser.mode(), exp)
 
     @pytest.mark.parametrize("dt", [str, object])
-    def test_strobj_multi_char(self, dt):
+    def test_strobj_multi_char(self, dt, using_infer_string):
         exp = ["bar"]
         data = ["foo"] * 2 + ["bar"] * 3
 
         ser = Series(data, dtype=dt)
         exp = Series(exp, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        if using_infer_string and dt is str:
+            tm.assert_extension_array_equal(algos.mode(ser.values), exp.values)
+        else:
+            tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_datelike_mode(self):

From 0962007726634e55f75150db82aadb754bea9752 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:29:16 +0530
Subject: [PATCH 128/176] DOC: fix SA01 for pandas.api.types.is_interval_dtype
 (#59863)

---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/common.py | 9 +++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7cc314007aabd..a436acd01013b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -115,7 +115,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_float PR01,SA01" \
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
-        -i "pandas.api.types.is_interval_dtype SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_list_like SA01" \
         -i "pandas.api.types.is_named_tuple PR07,SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 1a38bb03b2c1c..1093b35afa8a0 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -481,6 +481,15 @@ def is_interval_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array-like or dtype is of the Interval dtype.
 
+    See Also
+    --------
+    api.types.is_object_dtype : Check whether an array-like or dtype is of the
+        object dtype.
+    api.types.is_numeric_dtype : Check whether the provided array or dtype is
+        of a numeric dtype.
+    api.types.is_categorical_dtype : Check whether an array-like or dtype is of
+        the Categorical dtype.
+
     Examples
     --------
     >>> from pandas.core.dtypes.common import is_interval_dtype

From ffb3c1523747738369bd27d5cdb924ee6884100d Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:30:08 +0530
Subject: [PATCH 129/176] DOC: fix SA01 for pandas.api.types.is_list_like
 (#59864)

---
 ci/code_checks.sh    | 1 -
 pandas/_libs/lib.pyx | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index a436acd01013b..dd1b441b51772 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -116,7 +116,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
-        -i "pandas.api.types.is_list_like SA01" \
         -i "pandas.api.types.is_named_tuple PR07,SA01" \
         -i "pandas.api.types.is_object_dtype SA01" \
         -i "pandas.api.types.is_re PR07,SA01" \
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 8af48a861967a..de7d9af731010 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1220,6 +1220,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
     bool
         Whether `obj` has list-like properties.
 
+    See Also
+    --------
+    Series : One-dimensional ndarray with axis labels (including time series).
+    Index : Immutable sequence used for indexing and alignment.
+    numpy.ndarray : Array object from NumPy, which is considered list-like.
+
     Examples
     --------
     >>> import datetime

From 4b22453651cb71684ce1f56aa67ff6fc451af053 Mon Sep 17 00:00:00 2001
From: musvaage <112724366+musvaage@users.noreply.github.com>
Date: Wed, 25 Sep 2024 20:16:32 +0200
Subject: [PATCH 130/176] typo (#59852)

---
 pandas/io/pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index be7b8dc6640ba..618254fee9259 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3580,7 +3580,7 @@ def is_transposed(self) -> bool:
 
     @property
     def data_orientation(self) -> tuple[int, ...]:
-        """return a tuple of my permuted axes, non_indexable at the front"""
+        """return a tuple of my permutated axes, non_indexable at the front"""
         return tuple(
             itertools.chain(
                 [int(a[0]) for a in self.non_index_axes],

From 7543426cdf2728635e92b59585203963035ae536 Mon Sep 17 00:00:00 2001
From: Vibavari Gurunathan <vibavari2301@gmail.com>
Date: Wed, 25 Sep 2024 11:17:57 -0700
Subject: [PATCH 131/176] BUG: Fix from_records() column reorder issue, if
 columns!=None use passed param (#59717) (#59809)

* BUG: Fix columns param reorder issue - if columns!=None, use passed param (#59717)

* Add tests for to_arrays()

* Fix import order with isort

* fix sort

* Update datatype to int32

* Fis test

* Revert commit

* Add test for DaaFrame.from_records()

* Apply comments

* Delete test_to_arrays.py
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/core/internals/construction.py         |  3 ++-
 .../frame/constructors/test_from_records.py   | 23 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3b5183c43bcd0..516a5d938fb18 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -619,6 +619,7 @@ I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
+- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
 - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
 - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 07465e7b87fcd..959e572b2b35b 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -750,7 +750,8 @@ def to_arrays(
 
     elif isinstance(data, np.ndarray) and data.dtype.names is not None:
         # e.g. recarray
-        columns = Index(list(data.dtype.names))
+        if columns is None:
+            columns = Index(data.dtype.names)
         arrays = [data[k] for k in columns]
         return arrays, columns
 
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index abc3aab1c1492..1d4a2c0075e3e 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -469,3 +469,26 @@ def test_from_records_empty2(self):
 
         alt = DataFrame(arr)
         tm.assert_frame_equal(alt, expected)
+
+    def test_from_records_structured_array(self):
+        # GH 59717
+        data = np.array(
+            [
+                ("John", 25, "New York", 50000),
+                ("Jane", 30, "San Francisco", 75000),
+                ("Bob", 35, "Chicago", 65000),
+                ("Alice", 28, "Los Angeles", 60000),
+            ],
+            dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")],
+        )
+
+        actual_result = DataFrame.from_records(data, columns=["name", "salary", "city"])
+
+        modified_data = {
+            "name": ["John", "Jane", "Bob", "Alice"],
+            "salary": np.array([50000, 75000, 65000, 60000], dtype="int32"),
+            "city": ["New York", "San Francisco", "Chicago", "Los Angeles"],
+        }
+        expected_result = DataFrame(modified_data)
+
+        tm.assert_frame_equal(actual_result, expected_result)

From e38409c304f8da88efd7cf074819a1cf7d12be31 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:49:00 +0530
Subject: [PATCH 132/176] DOC: fix SA01 for pandas.arrays.BooleanArray (#59866)

---
 ci/code_checks.sh             | 1 -
 pandas/core/arrays/boolean.py | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index dd1b441b51772..40582f3069e97 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -122,7 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
-        -i "pandas.arrays.BooleanArray SA01" \
         -i "pandas.arrays.DatetimeArray SA01" \
         -i "pandas.arrays.IntegerArray SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 74c0cd7719c13..53ebc35b68d14 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -286,6 +286,13 @@ class BooleanArray(BaseMaskedArray):
     -------
     BooleanArray
 
+    See Also
+    --------
+    array : Create an array from data with the appropriate dtype.
+    BooleanDtype : Extension dtype for boolean data.
+    Series : One-dimensional ndarray with axis labels (including time series).
+    DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.
+
     Examples
     --------
     Create an BooleanArray with :func:`pandas.array`:

From f049159d8245959bf313e05d1109ed33f778a077 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:49:56 +0530
Subject: [PATCH 133/176] DOC: fix SA01, ES01 for
 pandas.api.types.is_object_dtype (#59865)

---
 ci/code_checks.sh            |  1 -
 pandas/core/dtypes/common.py | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 40582f3069e97..4eb9d4055e1f8 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -117,7 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_named_tuple PR07,SA01" \
-        -i "pandas.api.types.is_object_dtype SA01" \
         -i "pandas.api.types.is_re PR07,SA01" \
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 1093b35afa8a0..98c770ec4a8b0 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -141,6 +141,11 @@ def is_object_dtype(arr_or_dtype) -> bool:
     """
     Check whether an array-like or dtype is of the object dtype.
 
+    This method examines the input to determine if it is of the
+    object data type. Object dtype is a generic data type that can
+    hold any Python objects, including strings, lists, and custom
+    objects.
+
     Parameters
     ----------
     arr_or_dtype : array-like or dtype
@@ -151,6 +156,15 @@ def is_object_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array-like or dtype is of the object dtype.
 
+    See Also
+    --------
+    api.types.is_numeric_dtype : Check whether the provided array or dtype is of a
+        numeric dtype.
+    api.types.is_string_dtype : Check whether the provided array or dtype is of
+        the string dtype.
+    api.types.is_bool_dtype : Check whether the provided array or dtype is of a
+        boolean dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_object_dtype

From e221fa48a5d5e61f9adc830ed33562548bea9dd4 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:51:58 +0530
Subject: [PATCH 134/176] DOC: fix RT03 for pandas.date_range (#59868)

---
 ci/code_checks.sh                | 1 -
 pandas/core/indexes/datetimes.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4eb9d4055e1f8..72e12effb1104 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -170,7 +170,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.sum SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
-        -i "pandas.date_range RT03" \
         -i "pandas.errors.AttributeConflictWarning SA01" \
         -i "pandas.errors.CSSWarning SA01" \
         -i "pandas.errors.CategoricalConversionWarning SA01" \
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 3b3cda8f7cd33..536f22d38468d 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -875,6 +875,7 @@ def date_range(
     Returns
     -------
     DatetimeIndex
+        A DatetimeIndex object of the generated dates.
 
     See Also
     --------

From cf79ac87545744d7c7af7e49b443b2ed0b3ed047 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 25 Sep 2024 23:52:33 +0530
Subject: [PATCH 135/176] DOC: fix RT03, ES01 for
 pandas.core.resample.Resampler.ffill (#59871)

---
 ci/code_checks.sh       | 1 -
 pandas/core/resample.py | 8 +++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 72e12effb1104..49702dce0e258 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -155,7 +155,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
         -i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
-        -i "pandas.core.resample.Resampler.ffill RT03" \
         -i "pandas.core.resample.Resampler.get_group RT03,SA01" \
         -i "pandas.core.resample.Resampler.groups SA01" \
         -i "pandas.core.resample.Resampler.indices SA01" \
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index b621fcf9a6415..711396096a5e3 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -529,6 +529,11 @@ def ffill(self, limit: int | None = None):
         """
         Forward fill the values.
 
+        This method fills missing values by propagating the last valid
+        observation forward, up to the next valid observation. It is commonly
+        used in time series analysis when resampling data to a higher frequency
+        (upsampling) and filling gaps in the resampled output.
+
         Parameters
         ----------
         limit : int, optional
@@ -536,7 +541,8 @@ def ffill(self, limit: int | None = None):
 
         Returns
         -------
-        An upsampled Series.
+        Series
+            The resampled data with missing values filled forward.
 
         See Also
         --------

From 1ddf028c9469a9d6264171c4c79ef1691fe2c680 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 26 Sep 2024 00:08:18 +0530
Subject: [PATCH 136/176] DOC: fix SA01, ES01 for
 pandas.arrays.IntervalArray.mid (#59867)

* DOC: fix SA01, ES01 for pandas.arrays.IntervalArray.mid

* DOC: add double backticks for sphinx compatibility

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh              |  1 -
 pandas/core/arrays/interval.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 49702dce0e258..3dfd5a3931ecd 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -125,7 +125,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.IntegerArray SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
         -i "pandas.arrays.IntervalArray.length SA01" \
-        -i "pandas.arrays.IntervalArray.mid SA01" \
         -i "pandas.arrays.IntervalArray.right SA01" \
         -i "pandas.arrays.NumpyExtensionArray SA01" \
         -i "pandas.arrays.SparseArray PR07,SA01" \
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 52d64162358c8..2ac9c77bef322 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -1291,6 +1291,16 @@ def mid(self) -> Index:
         """
         Return the midpoint of each Interval in the IntervalArray as an Index.
 
+        The midpoint of an interval is calculated as the average of its
+        ``left`` and ``right`` bounds. This property returns a ``pandas.Index`` object
+        containing the midpoint for each interval.
+
+        See Also
+        --------
+        Interval.left : Return left bound for the interval.
+        Interval.right : Return right bound for the interval.
+        Interval.length : Return the length of each interval.
+
         Examples
         --------
 

From 22055e4d3d42c297b1c86306d77f7a27fad8dcf8 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 26 Sep 2024 00:08:59 +0530
Subject: [PATCH 137/176] DOC: fix SA01, ES01 for pandas.RangeIndex.step
 (#59857)

* DOC: fix SA01, ES01 for pandas.RangeIndex.step

* DOC: add double backticks for sphinx compatibility

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh            | 1 -
 pandas/core/indexes/range.py | 9 +++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 3dfd5a3931ecd..01486f0e3f926 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
-        -i "pandas.RangeIndex.step SA01" \
         -i "pandas.Series.cat.add_categories PR01,PR02" \
         -i "pandas.Series.cat.as_ordered PR01" \
         -i "pandas.Series.cat.as_unordered PR01" \
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 75d0dfbeb6f01..dc96d1c11db74 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -351,6 +351,15 @@ def step(self) -> int:
         """
         The value of the `step` parameter (``1`` if this was not supplied).
 
+        The ``step`` parameter determines the increment (or decrement in the case
+        of negative values) between consecutive elements in the ``RangeIndex``.
+
+        See Also
+        --------
+        RangeIndex : Immutable index implementing a range-based index.
+        RangeIndex.stop : Returns the stop value of the RangeIndex.
+        RangeIndex.start : Returns the start value of the RangeIndex.
+
         Examples
         --------
         >>> idx = pd.RangeIndex(5)

From efbc29666d820cf62854556cdeadf044b489de4c Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 26 Sep 2024 00:09:46 +0530
Subject: [PATCH 138/176] DOC: fix SA01, ES01 for
 pandas.Timedelta.to_timedelta64 (#59860)

* DOC: fix SA01, ES01 for pandas.Timedelta.to_timedelta64

* DOC: add double backticks for sphinx compatibility

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh                  |  1 -
 pandas/_libs/tslibs/timedeltas.pyx | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 01486f0e3f926..20e75f0f6f616 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -101,7 +101,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
-        -i "pandas.Timedelta.to_timedelta64 SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 0ff5c5fb81df8..84ca48c96459f 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1411,6 +1411,18 @@ cdef class _Timedelta(timedelta):
         """
         Return a numpy.timedelta64 object with 'ns' precision.
 
+        Since NumPy uses ``timedelta64`` objects for its time operations, converting
+        a pandas ``Timedelta`` into a NumPy ``timedelta64`` provides seamless
+        integration between the two libraries, especially when working in environments
+        that heavily rely on NumPy for array-based calculations.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta.
+        numpy.timedelta64 : A NumPy object for time duration.
+        Timedelta : Represents a duration, the difference between two dates
+            or times.
+
         Examples
         --------
         >>> td = pd.Timedelta('3D')

From c5cfe5d32c7fef4d42e1b22e188a438b5607b804 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 26 Sep 2024 00:12:43 +0530
Subject: [PATCH 139/176] DOC: fix SA01, ES01 for pandas.errors.EmptyDataError
 (#59872)

---
 ci/code_checks.sh         |  1 -
 pandas/errors/__init__.py | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 20e75f0f6f616..f662b4781e84b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -173,7 +173,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.ClosedFileError SA01" \
         -i "pandas.errors.DataError SA01" \
         -i "pandas.errors.DuplicateLabelError SA01" \
-        -i "pandas.errors.EmptyDataError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
         -i "pandas.errors.InvalidIndexError SA01" \
         -i "pandas.errors.InvalidVersion SA01" \
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 7851bc90c5782..b9ceae341afd3 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -205,6 +205,17 @@ class EmptyDataError(ValueError):
     """
     Exception raised in ``pd.read_csv`` when empty data or header is encountered.
 
+    This error is typically encountered when attempting to read an empty file or
+    an invalid file where no data or headers are present.
+
+    See Also
+    --------
+    read_csv : Read a comma-separated values (CSV) file into DataFrame.
+    errors.ParserError : Exception that is raised by an error encountered in parsing
+        file contents.
+    errors.DtypeWarning : Warning raised when reading different dtypes in a column
+        from a file.
+
     Examples
     --------
     >>> from io import StringIO

From 7e5282f5f125406cff7fdf80b452e114adfa4c26 Mon Sep 17 00:00:00 2001
From: Jonathan Marriott <34217286+JonathanMarriott@users.noreply.github.com>
Date: Wed, 25 Sep 2024 20:14:49 +0100
Subject: [PATCH 140/176] DOC: Fix inconsistent and incomplete documentation of
 `pandas.eval` (#59855)

* Improve content and organisation of eval documentation

* Link to pd.eval in pd.DataFrame.query

* Correct name for `//` is floor division

* Include arctan2

Co-authored-by: Xiao Yuan <yuanx749@gmail.com>

---------

Co-authored-by: Xiao Yuan <yuanx749@gmail.com>
---
 pandas/core/computation/eval.py | 37 +++++++++++++++++++++++++--------
 pandas/core/frame.py            | 33 +++++++++++++++--------------
 2 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index 485c7f87d6f33..4ccfbd71d9ce8 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -188,15 +188,6 @@ def eval(
     """
     Evaluate a Python expression as a string using various backends.
 
-    The following arithmetic operations are supported: ``+``, ``-``, ``*``,
-    ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
-    boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
-    Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
-    :keyword:`or`, and :keyword:`not` with the same semantics as the
-    corresponding bitwise operators.  :class:`~pandas.Series` and
-    :class:`~pandas.DataFrame` objects are supported and behave as they would
-    with plain ol' Python evaluation.
-
     .. warning::
 
         ``eval`` can run arbitrary code which can make you vulnerable to code
@@ -210,6 +201,34 @@ def eval(
         <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
         only Python `expressions
         <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
+
+        By default, with the numexpr engine, the following operations are supported:
+
+        - Arthimetic operations: ``+``, ``-``, ``*``, ``/``, ``**``, ``%``
+        - Boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not)
+        - Comparison operators: ``<``, ``<=``, ``==``, ``!=``, ``>=``, ``>``
+
+        Furthermore, the following mathematical functions are supported:
+
+        - Trigonometric: ``sin``, ``cos``, ``tan``, ``arcsin``, ``arccos``, \
+            ``arctan``, ``arctan2``, ``sinh``, ``cosh``, ``tanh``, ``arcsinh``, \
+            ``arccosh`` and ``arctanh``
+        - Logarithms: ``log`` natural, ``log10`` base 10, ``log1p`` log(1+x)
+        - Absolute Value ``abs``
+        - Square root ``sqrt``
+        - Exponential ``exp`` and Exponential minus one ``expm1``
+
+        See the numexpr engine `documentation
+        <https://numexpr.readthedocs.io/en/latest/user_guide.html#supported-functions>`__
+        for further function support details.
+
+        Using the ``'python'`` engine allows the use of native Python operators
+        such as floor division ``//``, in addition to built-in and user-defined
+        Python functions.
+
+        Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
+        :keyword:`or`, and :keyword:`not` with the same semantics as the
+        corresponding bitwise operators.
     parser : {'pandas', 'python'}, default 'pandas'
         The parser to use to construct the syntax tree from the expression. The
         default of ``'pandas'`` parses code slightly different than standard
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c80e9dfd23ba2..4c56948a48eb2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4479,20 +4479,11 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
         expr : str
             The query string to evaluate.
 
-            You can refer to variables
-            in the environment by prefixing them with an '@' character like
-            ``@a + b``.
-
-            You can refer to column names that are not valid Python variable names
-            by surrounding them in backticks. Thus, column names containing spaces
-            or punctuation (besides underscores) or starting with digits must be
-            surrounded by backticks. (For example, a column named "Area (cm^2)" would
-            be referenced as ```Area (cm^2)```). Column names which are Python keywords
-            (like "if", "for", "import", etc) cannot be used.
-
-            For example, if one of your columns is called ``a a`` and you want
-            to sum it with ``b``, your query should be ```a a` + b``.
+            See the documentation for :func:`eval` for details of
+            supported operations and functions in the query string.
 
+            See the documentation for :meth:`DataFrame.eval` for details on
+            referring to column names and variables in the query string.
         inplace : bool
             Whether to modify the DataFrame rather than creating a new one.
         **kwargs
@@ -4651,8 +4642,18 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
             in the environment by prefixing them with an '@' character like
             ``@a + b``.
 
-            You can refer to column names that are not valid Python variable
-            names by surrounding them with backticks `````.
+            You can refer to column names that are not valid Python variable names
+            by surrounding them in backticks. Thus, column names containing spaces
+            or punctuation (besides underscores) or starting with digits must be
+            surrounded by backticks. (For example, a column named "Area (cm^2)" would
+            be referenced as ```Area (cm^2)```). Column names which are Python keywords
+            (like "if", "for", "import", etc) cannot be used.
+
+            For example, if one of your columns is called ``a a`` and you want
+            to sum it with ``b``, your query should be ```a a` + b``.
+
+            See the documentation for :func:`eval` for full details of
+            supported operations and functions in the expression string.
         inplace : bool, default False
             If the expression contains an assignment, whether to perform the
             operation inplace and mutate the existing DataFrame. Otherwise,
@@ -4660,7 +4661,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
         **kwargs
             See the documentation for :func:`eval` for complete details
             on the keyword arguments accepted by
-            :meth:`~pandas.DataFrame.query`.
+            :meth:`~pandas.DataFrame.eval`.
 
         Returns
         -------

From c8a67401932c773ace0f62660f09b5684f39a148 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 25 Sep 2024 21:16:04 +0200
Subject: [PATCH 141/176] String dtype: allow string dtype for non-raw apply
 with numba engine (#59854)

* String dtype: allow string dtype for non-raw apply with numba engine

* remove xfails

* clean-up
---
 pandas/core/_numba/extensions.py       | 3 ++-
 pandas/core/apply.py                   | 5 -----
 pandas/tests/apply/test_frame_apply.py | 1 -
 pandas/tests/apply/test_numba.py       | 4 ----
 4 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/pandas/core/_numba/extensions.py b/pandas/core/_numba/extensions.py
index e6f0427de2a3a..413fdafc7fd04 100644
--- a/pandas/core/_numba/extensions.py
+++ b/pandas/core/_numba/extensions.py
@@ -53,7 +53,8 @@
 @contextmanager
 def set_numba_data(index: Index):
     numba_data = index._data
-    if numba_data.dtype == object:
+    if numba_data.dtype in (object, "string"):
+        numba_data = np.asarray(numba_data)
         if not lib.is_string_array(numba_data):
             raise ValueError(
                 "The numba engine only supports using string or numeric column names"
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 5959156d11123..7d50b466f5126 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1172,12 +1172,7 @@ def apply_with_numba(self) -> dict[int, Any]:
         from pandas.core._numba.extensions import set_numba_data
 
         index = self.obj.index
-        if index.dtype == "string":
-            index = index.astype(object)
-
         columns = self.obj.columns
-        if columns.dtype == "string":
-            columns = columns.astype(object)
 
         # Convert from numba dict to regular dict
         # Our isinstance checks in the df constructor don't pass for numbas typed dict
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3be3562d23cd6..dee0efcd8fd15 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -65,7 +65,6 @@ def test_apply(float_frame, engine, request):
         assert result.index is float_frame.index
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
 @pytest.mark.parametrize("nopython", [True, False])
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index 825d295043e69..d6cd9c321ace6 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -20,7 +18,6 @@ def apply_axis(request):
     return request.param
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_numba_vs_python_noop(float_frame, apply_axis):
     func = lambda x: x
     result = float_frame.apply(func, engine="numba", axis=apply_axis)
@@ -43,7 +40,6 @@ def test_numba_vs_python_string_index():
     )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_numba_vs_python_indexing():
     frame = DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},

From b87bf854519466182b43f9f7d5b6c9d91be87ad0 Mon Sep 17 00:00:00 2001
From: Naresh Kumar <nareshkumargangwar@gmail.com>
Date: Wed, 25 Sep 2024 12:18:03 -0700
Subject: [PATCH 142/176] ENH: Add kwargs to Series.map (#59843)

Co-authored-by: Naresh Kumar <naresh.kumar@snowflake.com>
---
 doc/source/whatsnew/v3.0.0.rst          | 1 +
 pandas/core/series.py                   | 9 +++++++++
 pandas/tests/series/methods/test_map.py | 7 +++++++
 3 files changed, 17 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 516a5d938fb18..41ba80989a0ce 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -54,6 +54,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
+- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :meth:`str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0c26ce27c680c..bbcb6615aeefd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -11,6 +11,7 @@
     Mapping,
     Sequence,
 )
+import functools
 import operator
 import sys
 from textwrap import dedent
@@ -4312,6 +4313,7 @@ def map(
         self,
         arg: Callable | Mapping | Series,
         na_action: Literal["ignore"] | None = None,
+        **kwargs,
     ) -> Series:
         """
         Map values of Series according to an input mapping or function.
@@ -4327,6 +4329,11 @@ def map(
         na_action : {None, 'ignore'}, default None
             If 'ignore', propagate NaN values, without passing them to the
             mapping correspondence.
+        **kwargs
+            Additional keyword arguments to pass as keywords arguments to
+            `arg`.
+
+            .. versionadded:: 3.0.0
 
         Returns
         -------
@@ -4388,6 +4395,8 @@ def map(
         3  I am a rabbit
         dtype: object
         """
+        if callable(arg):
+            arg = functools.partial(arg, **kwargs)
         new_values = self._map_values(arg, na_action=na_action)
         return self._constructor(new_values, index=self.index, copy=False).__finalize__(
             self, method="map"
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index 7fa8686fcc6c8..84b60a2afe6eb 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -597,3 +597,10 @@ def test_map_type():
     result = s.map(type)
     expected = Series([int, str, type], index=["a", "b", "c"])
     tm.assert_series_equal(result, expected)
+
+
+def test_map_kwargs():
+    # GH 59814
+    result = Series([2, 4, 5]).map(lambda x, y: x + y, y=2)
+    expected = Series([4, 6, 7])
+    tm.assert_series_equal(result, expected)

From a9f76d753dfe3db9206e5556c90ffac0e0ebf46d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Sep 2024 12:19:47 -0700
Subject: [PATCH 143/176] REF: pass dtype explicitly to _from_sequence inside
 pd.array (#59773)

REF: pass dtype explicitly to _from_sequence
---
 pandas/core/construction.py            | 6 ++++--
 pandas/tests/extension/base/methods.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index bb3aa3867ab08..1e1292f8ef089 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -358,7 +358,8 @@ def array(
             return cls._from_sequence(data, dtype=dtype, copy=copy)
 
         elif data.dtype.kind in "iu":
-            return IntegerArray._from_sequence(data, copy=copy)
+            dtype = IntegerArray._dtype_cls._get_dtype_mapping()[data.dtype]
+            return IntegerArray._from_sequence(data, dtype=dtype, copy=copy)
         elif data.dtype.kind == "f":
             # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
             #  we will fall back to NumpyExtensionArray.
@@ -366,7 +367,8 @@ def array(
                 return NumpyExtensionArray._from_sequence(
                     data, dtype=data.dtype, copy=copy
                 )
-            return FloatingArray._from_sequence(data, copy=copy)
+            dtype = FloatingArray._dtype_cls._get_dtype_mapping()[data.dtype]
+            return FloatingArray._from_sequence(data, dtype=dtype, copy=copy)
 
         elif data.dtype.kind == "b":
             return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index dd2ed0bd62a02..fd9fec0cb490c 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -549,7 +549,7 @@ def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series):
         dtype = data_for_sorting.dtype
         data_for_sorting = pd.array([True, False], dtype=dtype)
         b, a = data_for_sorting
-        arr = type(data_for_sorting)._from_sequence([a, b])
+        arr = type(data_for_sorting)._from_sequence([a, b], dtype=dtype)
 
         if as_series:
             arr = pd.Series(arr)

From a92b919a1bb676252b45e574d102b2af29daac12 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Sep 2024 12:21:12 -0700
Subject: [PATCH 144/176] REF: pass dtype explicitly to _from_sequence (#59774)

---
 pandas/core/arrays/arrow/array.py             |  8 ++++++-
 pandas/core/arrays/datetimelike.py            |  6 ++---
 pandas/core/arrays/datetimes.py               |  6 +----
 pandas/core/arrays/period.py                  |  2 +-
 .../arrays/datetimes/test_constructors.py     | 22 ++++++++++++-------
 pandas/tests/arrays/test_array.py             |  8 +++++--
 pandas/tests/arrays/test_datetimelike.py      | 10 +++++----
 pandas/tests/arrays/test_datetimes.py         | 12 +++++++---
 pandas/tests/arrays/test_timedeltas.py        |  8 +++----
 pandas/tests/base/test_conversion.py          |  3 ++-
 pandas/tests/dtypes/test_generic.py           |  4 ++--
 .../series/accessors/test_dt_accessor.py      |  3 ++-
 12 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 39cae5b8e2683..00d46ab9296d0 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2300,7 +2300,13 @@ def _groupby_op(
         )
         if isinstance(result, np.ndarray):
             return result
-        return type(self)._from_sequence(result, copy=False)
+        elif isinstance(result, BaseMaskedArray):
+            pa_result = result.__arrow_array__()
+            return type(self)(pa_result)
+        else:
+            # DatetimeArray, TimedeltaArray
+            pa_result = pa.array(result, from_pandas=True)
+            return type(self)(pa_result)
 
     def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
         """Apply a callable to each element while maintaining the chunking structure."""
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 7be8daa09c758..a25a698856747 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1393,7 +1393,7 @@ def __add__(self, other):
         if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
             from pandas.core.arrays import TimedeltaArray
 
-            return TimedeltaArray._from_sequence(result)
+            return TimedeltaArray._from_sequence(result, dtype=result.dtype)
         return result
 
     def __radd__(self, other):
@@ -1453,7 +1453,7 @@ def __sub__(self, other):
         if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
             from pandas.core.arrays import TimedeltaArray
 
-            return TimedeltaArray._from_sequence(result)
+            return TimedeltaArray._from_sequence(result, dtype=result.dtype)
         return result
 
     def __rsub__(self, other):
@@ -1472,7 +1472,7 @@ def __rsub__(self, other):
                 # Avoid down-casting DatetimeIndex
                 from pandas.core.arrays import DatetimeArray
 
-                other = DatetimeArray._from_sequence(other)
+                other = DatetimeArray._from_sequence(other, dtype=other.dtype)
             return other - self
         elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
             # GH#19959 datetime - datetime is well-defined as timedelta,
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 201c449185057..43f4428118aa7 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -818,11 +818,7 @@ def _add_offset(self, offset: BaseOffset) -> Self:
                     stacklevel=find_stack_level(),
                 )
             res_values = self.astype("O") + offset
-            # TODO(GH#55564): as_unit will be unnecessary
-            result = type(self)._from_sequence(res_values).as_unit(self.unit)
-            if not len(self):
-                # GH#30336 _from_sequence won't be able to infer self.tz
-                return result.tz_localize(self.tz)
+            result = type(self)._from_sequence(res_values, dtype=self.dtype)
 
         else:
             result = type(self)._simple_new(res_values, dtype=res_values.dtype)
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index aa8dacbd6aad5..7d0ad74f851f0 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -812,7 +812,7 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
         new_parr = self.asfreq(freq, how=how)
 
         new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
-        dta = DatetimeArray._from_sequence(new_data)
+        dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]"))
 
         if self.freq.name == "B":
             # See if we can retain BDay instead of Day in cases where
diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py
index d7264c002c67f..74cc3e991bb76 100644
--- a/pandas/tests/arrays/datetimes/test_constructors.py
+++ b/pandas/tests/arrays/datetimes/test_constructors.py
@@ -28,10 +28,12 @@ def test_mixing_naive_tzaware_raises(self, meth):
         # GH#24569
         arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
 
-        msg = (
-            "Cannot mix tz-aware with tz-naive values|"
-            "Tz-aware datetime.datetime cannot be converted "
-            "to datetime64 unless utc=True"
+        msg = "|".join(
+            [
+                "Cannot mix tz-aware with tz-naive values",
+                "Tz-aware datetime.datetime cannot be converted "
+                "to datetime64 unless utc=True",
+            ]
         )
 
         for obj in [arr, arr[::-1]]:
@@ -63,10 +65,10 @@ def test_bool_dtype_raises(self):
 
     def test_copy(self):
         data = np.array([1, 2, 3], dtype="M8[ns]")
-        arr = DatetimeArray._from_sequence(data, copy=False)
+        arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=False)
         assert arr._ndarray is data
 
-        arr = DatetimeArray._from_sequence(data, copy=True)
+        arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=True)
         assert arr._ndarray is not data
 
     def test_numpy_datetime_unit(self, unit):
@@ -163,7 +165,9 @@ def test_from_arrow_from_empty(unit, tz):
     dtype = DatetimeTZDtype(unit=unit, tz=tz)
 
     result = dtype.__from_arrow__(arr)
-    expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
+    expected = DatetimeArray._from_sequence(
+        np.array(data, dtype=f"datetime64[{unit}]"), dtype=np.dtype(f"M8[{unit}]")
+    )
     expected = expected.tz_localize(tz=tz)
     tm.assert_extension_array_equal(result, expected)
 
@@ -179,7 +183,9 @@ def test_from_arrow_from_integers():
     dtype = DatetimeTZDtype(unit="ns", tz="UTC")
 
     result = dtype.__from_arrow__(arr)
-    expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
+    expected = DatetimeArray._from_sequence(
+        np.array(data, dtype="datetime64[ns]"), dtype=np.dtype("M8[ns]")
+    )
     expected = expected.tz_localize("UTC")
     tm.assert_extension_array_equal(result, expected)
 
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index 4070a2844846f..3c0ef1e4d928b 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -370,11 +370,15 @@ def test_array_copy():
         ),
         (
             np.array([1, 2], dtype="m8[ns]"),
-            TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
+            TimedeltaArray._from_sequence(
+                np.array([1, 2], dtype="m8[ns]"), dtype=np.dtype("m8[ns]")
+            ),
         ),
         (
             np.array([1, 2], dtype="m8[us]"),
-            TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
+            TimedeltaArray._from_sequence(
+                np.array([1, 2], dtype="m8[us]"), dtype=np.dtype("m8[us]")
+            ),
         ),
         # integer
         ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 6dd1ef9d59ab4..0c8eefab95464 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -257,7 +257,8 @@ def test_fillna_method_doesnt_change_orig(self, method):
         if self.array_cls is PeriodArray:
             arr = self.array_cls(data, dtype="period[D]")
         else:
-            arr = self.array_cls._from_sequence(data)
+            dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
+            arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))
         arr[4] = NaT
 
         fill_value = arr[3] if method == "pad" else arr[5]
@@ -273,7 +274,8 @@ def test_searchsorted(self):
         if self.array_cls is PeriodArray:
             arr = self.array_cls(data, dtype="period[D]")
         else:
-            arr = self.array_cls._from_sequence(data)
+            dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
+            arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))
 
         # scalar
         result = arr.searchsorted(arr[1])
@@ -739,10 +741,10 @@ def test_array_i8_dtype(self, arr1d):
     def test_from_array_keeps_base(self):
         # Ensure that DatetimeArray._ndarray.base isn't lost.
         arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
-        dta = DatetimeArray._from_sequence(arr)
+        dta = DatetimeArray._from_sequence(arr, dtype=arr.dtype)
 
         assert dta._ndarray is arr
-        dta = DatetimeArray._from_sequence(arr[:0])
+        dta = DatetimeArray._from_sequence(arr[:0], dtype=arr.dtype)
         assert dta._ndarray.base is arr
 
     def test_from_dti(self, arr1d):
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index 8e348805de978..e3f49d04a0ff2 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -499,7 +499,7 @@ def test_value_counts_preserves_tz(self):
     @pytest.mark.parametrize("method", ["pad", "backfill"])
     def test_fillna_preserves_tz(self, method):
         dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
-        arr = DatetimeArray._from_sequence(dti, copy=True)
+        arr = DatetimeArray._from_sequence(dti, dtype=dti.dtype, copy=True)
         arr[2] = pd.NaT
 
         fill_val = dti[1] if method == "pad" else dti[3]
@@ -665,7 +665,9 @@ def test_shift_fill_value(self):
         dti = pd.date_range("2016-01-01", periods=3)
 
         dta = dti._data
-        expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
+        expected = DatetimeArray._from_sequence(
+            np.roll(dta._ndarray, 1), dtype=dti.dtype
+        )
 
         fv = dta[-1]
         for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
@@ -731,7 +733,11 @@ def test_iter_zoneinfo_fold(self, tz):
         )
         utc_vals *= 1_000_000_000
 
-        dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
+        dta = (
+            DatetimeArray._from_sequence(utc_vals, dtype=np.dtype("M8[ns]"))
+            .tz_localize("UTC")
+            .tz_convert(tz)
+        )
 
         left = dta[2]
         right = list(dta)[2]
diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
index bcc52f197ee51..fb7c7afdc6ff9 100644
--- a/pandas/tests/arrays/test_timedeltas.py
+++ b/pandas/tests/arrays/test_timedeltas.py
@@ -263,10 +263,10 @@ def test_searchsorted_invalid_types(self, other, index):
 class TestUnaryOps:
     def test_abs(self):
         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
-        arr = TimedeltaArray._from_sequence(vals)
+        arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
 
         evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
-        expected = TimedeltaArray._from_sequence(evals)
+        expected = TimedeltaArray._from_sequence(evals, dtype=evals.dtype)
 
         result = abs(arr)
         tm.assert_timedelta_array_equal(result, expected)
@@ -276,7 +276,7 @@ def test_abs(self):
 
     def test_pos(self):
         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
-        arr = TimedeltaArray._from_sequence(vals)
+        arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
 
         result = +arr
         tm.assert_timedelta_array_equal(result, arr)
@@ -288,7 +288,7 @@ def test_pos(self):
 
     def test_neg(self):
         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
-        arr = TimedeltaArray._from_sequence(vals)
+        arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
 
         evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]")
         expected = TimedeltaArray._from_sequence(evals)
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 13a3ff048c79e..d8af7abe83084 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -333,7 +333,8 @@ def test_array_multiindex_raises():
         # Timedelta
         (
             TimedeltaArray._from_sequence(
-                np.array([0, 3600000000000], dtype="i8").view("m8[ns]")
+                np.array([0, 3600000000000], dtype="i8").view("m8[ns]"),
+                dtype=np.dtype("m8[ns]"),
             ),
             np.array([0, 3600000000000], dtype="m8[ns]"),
         ),
diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
index 261f86bfb0326..2b90886a8d070 100644
--- a/pandas/tests/dtypes/test_generic.py
+++ b/pandas/tests/dtypes/test_generic.py
@@ -20,8 +20,8 @@ class TestABCClasses:
     df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index)
     sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).standard_normal(10))
 
-    datetime_array = pd.core.arrays.DatetimeArray._from_sequence(datetime_index)
-    timedelta_array = pd.core.arrays.TimedeltaArray._from_sequence(timedelta_index)
+    datetime_array = datetime_index.array
+    timedelta_array = timedelta_index.array
 
     abc_pairs = [
         ("ABCMultiIndex", multi_index),
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 9b9a8ea3600ae..885adb3543b46 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -790,7 +790,8 @@ def test_end_time_timevalues(self, input_vals):
         # GH#17157
         # Check that the time part of the Period is adjusted by end_time
         # when using the dt accessor on a Series
-        input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
+        dtype = pd.PeriodDtype(input_vals[0].freq)
+        input_vals = PeriodArray._from_sequence(np.asarray(input_vals), dtype=dtype)
 
         ser = Series(input_vals)
         result = ser.dt.end_time

From b96491a11b7938c9146a26bfac339a6ebe0ca4a2 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 25 Sep 2024 09:22:49 -1000
Subject: [PATCH 145/176] DOC: Emphasize team managed pandas in installation
 docs (#59822)

* DOC: Emphasize team managed pandas in installation docs

* grammar
---
 doc/source/development/maintaining.rst        |   2 +-
 doc/source/getting_started/index.rst          |   3 +-
 doc/source/getting_started/install.rst        | 151 ++++++------------
 web/pandas/getting_started.md                 |  29 +---
 .../static/img/install/anaconda_prompt.png    | Bin 1373 -> 0 bytes
 .../static/img/install/jupyterlab_home.png    | Bin 1962 -> 0 bytes
 .../img/install/pandas_import_and_version.png | Bin 2252 -> 0 bytes
 7 files changed, 52 insertions(+), 133 deletions(-)
 delete mode 100644 web/pandas/static/img/install/anaconda_prompt.png
 delete mode 100644 web/pandas/static/img/install/jupyterlab_home.png
 delete mode 100644 web/pandas/static/img/install/pandas_import_and_version.png

diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
index 50d380cab1d50..1e4a851d0e72d 100644
--- a/doc/source/development/maintaining.rst
+++ b/doc/source/development/maintaining.rst
@@ -344,7 +344,7 @@ in the next places:
 - Git repo with a `new tag <https://github.com/pandas-dev/pandas/tags>`_
 - Source distribution in a `GitHub release <https://github.com/pandas-dev/pandas/releases>`_
 - Pip packages in the `PyPI <https://pypi.org/project/pandas/>`_
-- Conda/Mamba packages in `conda-forge <https://anaconda.org/conda-forge/pandas>`_
+- Conda packages in `conda-forge <https://anaconda.org/conda-forge/pandas>`_
 
 The process for releasing a new version of pandas is detailed next section.
 
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 36ed553d9d88e..a17699a71fbd3 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -17,8 +17,7 @@ Installation
         :columns: 12 12 6 6
         :padding: 3
 
-        pandas is part of the `Anaconda <https://docs.continuum.io/anaconda/>`__
-        distribution and can be installed with Anaconda or Miniconda:
+        pandas can be installed via conda from `conda-forge <https://anaconda.org/conda-forge/pandas>`__.
 
         ++++++++++++++++++++++
 
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 8e6cb9e9a132d..b3982c4ad091f 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -6,15 +6,16 @@
 Installation
 ============
 
-The easiest way to install pandas is to install it
-as part of the `Anaconda <https://docs.continuum.io/free/anaconda/>`__ distribution, a
-cross platform distribution for data analysis and scientific computing.
-The `Conda <https://conda.io/en/latest/>`__ package manager is the
-recommended installation method for most users.
+The pandas development team officially distributes pandas for installation
+through the following methods:
 
-Instructions for installing :ref:`from source <install.source>`,
-:ref:`PyPI <install.pypi>`, or a
-:ref:`development version <install.dev>` are also provided.
+* Available on `conda-forge <https://anaconda.org/conda-forge/pandas>`__ for installation with the conda package manager.
+* Available on `PyPI <https://pypi.org/project/pandas/>`__ for installation with pip.
+* Available on `Github <https://github.com/pandas-dev/pandas>`__ for installation from source.
+
+.. note::
+    pandas may be installable from other sources besides the ones listed above,
+    but they are **not** managed by the pandas development team.
 
 .. _install.version:
 
@@ -26,68 +27,54 @@ See :ref:`Python support policy <policies.python_support>`.
 Installing pandas
 -----------------
 
-.. _install.anaconda:
+.. _install.conda:
 
-Installing with Anaconda
-~~~~~~~~~~~~~~~~~~~~~~~~
+Installing with Conda
+~~~~~~~~~~~~~~~~~~~~~
 
-For users that are new to Python, the easiest way to install Python, pandas, and the
-packages that make up the `PyData <https://pydata.org/>`__ stack
-(`SciPy <https://scipy.org/>`__, `NumPy <https://numpy.org/>`__,
-`Matplotlib <https://matplotlib.org/>`__, `and more <https://docs.continuum.io/free/anaconda/reference/packages/pkg-docs/>`__)
-is with `Anaconda <https://docs.continuum.io/free/anaconda/>`__, a cross-platform
-(Linux, macOS, Windows) Python distribution for data analytics and
-scientific computing. Installation instructions for Anaconda
-`can be found here <https://docs.continuum.io/free/anaconda/install/>`__.
+For users working with the `Conda <https://conda.io/en/latest/>`__ package manager,
+pandas can be installed from the ``conda-forge`` channel.
 
-.. _install.miniconda:
+.. code-block:: shell
 
-Installing with Miniconda
-~~~~~~~~~~~~~~~~~~~~~~~~~
+    conda install -c conda-forge pandas
 
-For users experienced with Python, the recommended way to install pandas with
-`Miniconda <https://docs.conda.io/en/latest/miniconda.html>`__.
-Miniconda allows you to create a minimal, self-contained Python installation compared to Anaconda and use the
-`Conda <https://conda.io/en/latest/>`__ package manager to install additional packages
-and create a virtual environment for your installation. Installation instructions for Miniconda
-`can be found here <https://docs.conda.io/en/latest/miniconda.html>`__.
+To install the Conda package manager on your system, the
+`Miniforge distribution <https://github.com/conda-forge/miniforge?tab=readme-ov-file#install>`__
+is recommended.
 
-The next step is to create a new conda environment. A conda environment is like a
-virtualenv that allows you to specify a specific version of Python and set of libraries.
-Run the following commands from a terminal window.
+Additionally, it is recommended to install and run pandas from a virtual environment.
 
 .. code-block:: shell
 
     conda create -c conda-forge -n name_of_my_env python pandas
-
-This will create a minimal environment with only Python and pandas installed.
-To put your self inside this environment run.
-
-.. code-block:: shell
-
+    # On Linux or MacOS
     source activate name_of_my_env
     # On Windows
     activate name_of_my_env
 
-.. _install.pypi:
+.. tip::
+    For users that are new to Python, the easiest way to install Python, pandas, and the
+    packages that make up the `PyData <https://pydata.org/>`__ stack such as
+    `SciPy <https://scipy.org/>`__, `NumPy <https://numpy.org/>`__ and
+    `Matplotlib <https://matplotlib.org/>`__
+    is with `Anaconda <https://docs.anaconda.com/anaconda/install/>`__, a cross-platform
+    (Linux, macOS, Windows) Python distribution for data analytics and
+    scientific computing.
 
-Installing from PyPI
-~~~~~~~~~~~~~~~~~~~~
+    However, pandas from Anaconda is **not** officially managed by the pandas development team.
 
-pandas can be installed via pip from
-`PyPI <https://pypi.org/project/pandas>`__.
+.. _install.pip:
 
-.. code-block:: shell
-
-    pip install pandas
+Installing with pip
+~~~~~~~~~~~~~~~~~~~
 
-.. note::
-    You must have ``pip>=19.3`` to install from PyPI.
+For users working with the `pip <https://pip.pypa.io/en/stable/>`__ package manager,
+pandas can be installed from `PyPI <https://pypi.org/project/pandas/>`__.
 
-.. note::
+.. code-block:: shell
 
-    It is recommended to install and run pandas from a virtual environment, for example,
-    using the Python standard library's `venv <https://docs.python.org/3/library/venv.html>`__
+    pip install pandas
 
 pandas can also be installed with sets of optional dependencies to enable certain functionality. For example,
 to install pandas with the optional dependencies to read Excel files.
@@ -98,25 +85,8 @@ to install pandas with the optional dependencies to read Excel files.
 
 The full list of extras that can be installed can be found in the :ref:`dependency section.<install.optional_dependencies>`
 
-Handling ImportErrors
-~~~~~~~~~~~~~~~~~~~~~
-
-If you encounter an ``ImportError``, it usually means that Python couldn't find pandas in the list of available
-libraries. Python internally has a list of directories it searches through, to find packages. You can
-obtain these directories with.
-
-.. code-block:: python
-
-    import sys
-    sys.path
-
-One way you could be encountering this error is if you have multiple Python installations on your system
-and you don't have pandas installed in the Python installation you're currently using.
-In Linux/Mac you can run ``which python`` on your terminal and it will tell you which Python installation you're
-using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended.
-
-It is highly recommended to use ``conda``, for quick installation and for package and dependency updates.
-You can find simple installation instructions for pandas :ref:`in this document <install.miniconda>`.
+Additionally, it is recommended to install and run pandas from a virtual environment, for example,
+using the Python standard library's `venv <https://docs.python.org/3/library/venv.html>`__
 
 .. _install.source:
 
@@ -144,49 +114,24 @@ index from the PyPI registry of anaconda.org. You can install it by running.
 
     pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas
 
-Note that you might be required to uninstall an existing version of pandas to install the development version.
+.. note::
+    You might be required to uninstall an existing version of pandas to install the development version.
 
-.. code-block:: shell
+    .. code-block:: shell
 
-    pip uninstall pandas -y
+        pip uninstall pandas -y
 
 Running the test suite
 ----------------------
 
-pandas is equipped with an exhaustive set of unit tests. The packages required to run the tests
-can be installed with ``pip install "pandas[test]"``. To run the tests from a
-Python terminal.
-
-.. code-block:: python
-
-    >>> import pandas as pd
-    >>> pd.test()
-    running: pytest -m "not slow and not network and not db" /home/user/anaconda3/lib/python3.10/site-packages/pandas
-
-    ============================= test session starts ==============================
-    platform linux -- Python 3.9.7, pytest-6.2.5, py-1.11.0, pluggy-1.0.0
-    rootdir: /home/user
-    plugins: dash-1.19.0, anyio-3.5.0, hypothesis-6.29.3
-    collected 154975 items / 4 skipped / 154971 selected
-    ........................................................................ [  0%]
-    ........................................................................ [ 99%]
-    .......................................                                  [100%]
-
-    ==================================== ERRORS ====================================
-
-    =================================== FAILURES ===================================
-
-    =============================== warnings summary ===============================
-
-    =========================== short test summary info ============================
-
-    = 1 failed, 146194 passed, 7402 skipped, 1367 xfailed, 5 xpassed, 197 warnings, 10 errors in 1090.16s (0:18:10) =
+If pandas has been installed :ref:`from source <install.source>`, running ``pytest pandas`` will run all of pandas unit tests.
 
+The unit tests can also be run from the pandas module itself with the :func:`test` function. The packages required to run the tests
+can be installed with ``pip install "pandas[test]"``.
 
 .. note::
 
-    This is just an example of what information is shown. Test failures are not necessarily indicative
-    of a broken pandas installation.
+    Test failures are not necessarily indicative of a broken pandas installation.
 
 .. _install.dependencies:
 
@@ -219,7 +164,7 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
 optional dependency is not installed, pandas will raise an ``ImportError`` when
 the method requiring that dependency is called.
 
-If using pip, optional pandas dependencies can be installed or managed in a file (e.g. requirements.txt or pyproject.toml)
+With pip, optional pandas dependencies can be installed or managed in a file (e.g. requirements.txt or pyproject.toml)
 as optional extras (e.g. ``pandas[performance, aws]``). All optional dependencies can be installed with ``pandas[all]``,
 and specific sets of dependencies are listed in the sections below.
 
diff --git a/web/pandas/getting_started.md b/web/pandas/getting_started.md
index 0c4219e1ae12e..801081a9ef391 100644
--- a/web/pandas/getting_started.md
+++ b/web/pandas/getting_started.md
@@ -2,33 +2,8 @@
 
 ## Installation instructions
 
-The next steps provides the easiest and recommended way to set up your
-environment to use pandas. Other installation options can be found in
-the [advanced installation page]({{ base_url}}docs/getting_started/install.html).
-
-1. Download [Anaconda](https://www.anaconda.com/download/) for your operating system and
-   the latest Python version, run the installer, and follow the steps. Please note:
-
-    - It is not needed (and discouraged) to install Anaconda as root or administrator.
-    - When asked if you wish to initialize Anaconda3, answer yes.
-    - Restart the terminal after completing the installation.
-
-    Detailed instructions on how to install Anaconda can be found in the
-    [Anaconda documentation](https://docs.anaconda.com/anaconda/install/).
-
-2. In the Anaconda prompt (or terminal in Linux or macOS), start JupyterLab:
-
-    <img class="img-fluid" alt="" src="{{ base_url }}/static/img/install/anaconda_prompt.png"/>
-
-3. In JupyterLab, create a new (Python 3) notebook:
-
-    <img class="img-fluid" alt="" src="{{ base_url }}/static/img/install/jupyterlab_home.png"/>
-
-4. In the first cell of the notebook, you can import pandas and check the version with:
-
-    <img class="img-fluid" alt="" src="{{ base_url }}/static/img/install/pandas_import_and_version.png"/>
-
-5. Now you are ready to use pandas, and you can write your code in the next cells.
+To install pandas, please reference the [installation page]({{ base_url}}docs/getting_started/install.html)
+from the pandas documentation.
 
 ## Tutorials
 
diff --git a/web/pandas/static/img/install/anaconda_prompt.png b/web/pandas/static/img/install/anaconda_prompt.png
deleted file mode 100644
index 7b547e4ebb02a6102ecf615ddddf576dc74ccd15..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1373
zcmeAS@N?(olHy`uVBq!ia0y~yVA=y@|6*YVk{h`?HGq^sfKP}k10#b9bF2%uKq8Bc
zoSdPsM`yJOV`zA^eCw@!9nLL_Hq6}KxA)+_-aCJPy;}PB|DUWqmG2oCSYCL#IEGZ*
zdV6bswsgA0fsf$}7Q0V-C)y;v<(a<5EiOjKX*{jYY<B~i#htks`D2ZqDLh%n*8Ggm
zh>J;_H*%M>%es_ziw=tk%35>kwN(GMdp|eD+v?`u#*FU$_YYp{-z{xd_vPMdfu$Mq
z_b#zrrsVsZJFeoPYQ@9W=@0(uf2{NBy7zda`91S|w-4{yzyJB;B6M#}*47iZzoiHG
zYu|el9(QBjrhq{Cd;aR`+V}YT%Rg`1eDB-q%6#+p`IW^J4@JMQ6TDaVpYPtI@4sG0
zZkZkaPf2jk`TUB0`xiI;*Y(BzOo?CZF)zkGEB%?n!@0HQU+?_!);D?Ib9ZND>BO|7
zfuYM&F7`dFTd=1xf8x2t@<F!V{Uvo%wx6B+GVoi-vOux+N|XMy%ZJaLo%Q)%PL^BA
zw5;lotC}-co3sB*{G1jjcJ@-F)b-`QbInWd+sypNGsP&z@$8=BscGwPmrZ!u_+i%f
zvbjkU`XB06>Tj8+eN|-Mv!uJPv~0IE?>M%*B6eTVp4TFG)2qH7e;)67sCwDgi4T8n
z4X?YjcI(e2^KTw?G<bgT>XlEo*wa2woyS@FRCE7~#}U=`)1)_Z@9v!X(z`i*<<9)r
z>%qS_)?bOU==Z*Nr+Sw7<=<;}&OLee^q%dHMT+MqKi_a?#@zTnOM}yo_I;c8G3EYm
zbM4BnckiG6aqSy_Wv2cf7X3XRg!X(m<o)4aeMS6-`hxG0VBR5a{e#?aX{d_Zd(Q2!
zyHT-Gc#S>(bpAUZ=Zag;xDg+Hz52wSc$R1xzm+dkTEnleepdRpSGcyqyz(#?-~Ktr
z9&ApqJiBjut<j$EpJsWk<#(HY%J<l+$tT_iFO>@0s^;@`;qp1b);qoiDsgw-|5aG>
za!=^p!$*9!%e<eyy6MBNqMn^{qI)(ba__o#UF%ED?sILYH*U#p`Ec#tt-!FOR&Tev
zHQtw!^0xHpv$uH5<{ei&cUla8)al!uPY>VOApd{e)Hh;}t2VQU%U3-wI;wsAwEyAk
z{_lI;3N~AaymyZNz{dLC(dgqGm9j}HoZBi=CFXXP&)!~WAr@D?r~dW%gmujCuYIXA
zo-#YUHsd<u(<7_5mY4GH-fF?qv%Oku`sLlbgikg9n(*>&Of|dK8s4n5iKccB*XkbB
z-dsFkYxuI8?@pevY47t%UomHXx2a0Z^Qo_Ie9zokETOvbXF&aZshP13?%#Gj{cv*M
z^-rhHRI$E4xZBOU&pbKHZ_bU!Ag_2<&izrm_Uro(`&P)9)W2>`QV&>jvMj#({4=q8
zSGR9lD?T^&ey#P#uI&@rzW=K)z7e)&pP}x4W$yK#&ozHP4~khx)FGl15#bOqq*w+f
s0<2;%VWh+YR|*qDw`P!8z2z1E3ib2V9y4c40?SJVPgg&ebxsLQ0Dqmv%>V!Z

diff --git a/web/pandas/static/img/install/jupyterlab_home.png b/web/pandas/static/img/install/jupyterlab_home.png
deleted file mode 100644
index c62d33a5e0fc605be6d66c4a7be9f31d9baee8bc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1962
zcmY*aX;f2576yr|C!ium34ulk$V5mBVvYnv8bsrQ2tvTfbBO~AhNmd-LK+t~8(G|w
zZZ^fGl{JG9BJfZk5=0F+G65Sqk0dN<p(QV}<OmWcYR}A>KexVHcd4p->#NEQ4cUq{
zw>38~Fu>Bb1%(+HEQ0lT4r8R3b>_Ki^uza|JNIs}x5p<E(|u3eONl!)nY<*5_pG}x
zp#07)_;Jf@`^B=p`GtQ5e^wk)j4Bt@tqY$&amCAC8yJ{)(}Du_LWA#RbqV;mm0g<p
z_Pha)0QpKxj<TE=^s{l`1GktW$Lw?44?oylM_L2;G+cIjb<iBx*LB;zxgqq#u|<*v
zL(lgPORiW}IAWGNj+hqNa(+6cSv66Ha9rPBtv=YTE$e$W<lx+F+9Ptc>Yd-DUe*1k
z%vIBQ4hKMHFgp94I-?}2QIV}z^#UoA*VyC<4x1rALk{=C_+h-kpRThYZ*=bL*pC2c
zia?N%+<|ve|K3q^nbJp&cH_ihceofoADYf%M)eH1z<cx#mGqkfUc$AVR5`J+Ixb<X
zsPtO3SI|tbvXpVTbxe+d1CyGni!zxpTw+%S#T=B75g$<eV5-L>8@3MihnH3f8@eW<
zI8th{1-mOEK!iWZZ0v3M`{+nuXG^SKINXN@JCMTSt5KJHtqW4uIG0EQz{8;$AMtna
zo(sy=f$O&VpdA)u_?x&|Yov$E^atdf?%tu4X;;vE%QwE_D2QL|=f72=V~0GO*QSrb
z01)RF?K*hxSNMNEUsHVbK>*c)sw?Lch98SKY>Izo11Zw0ApebzkPLc08>#>xEu{6)
zuR80@=N<jTE4w7v)Cwbumkwl6JdjiU_UhH}9J|+9UT}r7t_D)C<T9seo&SK#U!I$r
zT*}zbY*Qth$O}3cg8hA~xW}HPEj*W`soqyc2;g;1j)m`{@;2y8Ez3&avfSS0Robjg
z8&+DXf|4E!cqeMDUfZqKJa0&tD=AcZ=Re-;O$v)7u={Dhp(!HHs5%lEpGgO((4E|Y
z3?Ux4(#wEkURa4EB=S9K8O}sg+UM*2w=|&JgWXxt4sQt8RS3J{!9CV~cNUi!u@<6^
zEkwms3(&}2GsF|h>5udEhtw~d3^ktVjYGMa1=0R6VTzkUDm<-JEIizl*G{s#FVff5
z_34XD2pZal+{a^o%i<kSWa5BDfAvN97A5Tbk!US3{O{D4UVLdyH*Kolhgw5w$K&3+
zIrhI~Oz56AVL%lu8(IXa+{&K0-@hSDu|t-@rC~O^$Lt}6w1+{p2hF}+=rtsUCMz)m
zhg?(E_%-p$U@ZZzLnrU{y{qaz^1%yY%4<G0zq@-8%RhsZP9F({yGL5kaaGJNUZjz1
zWSbeV0AG7(1m;Ug<UpiRf<qp_PuxX#C(y?X%!g^v@NjxkZ?7kO5}ow><?ON#iO1Wj
zt_T}TEUkNNv=-SHiqZJtm=K5R1V>Qe2scBivlFxDga&*ML^`I#vMCDb)0W>0wPeU!
z(>R?oqDA9JY6Q!{562YfpfQL+epM1=*>op$$PUfE2{e9`^*5yZ7G-&)Tq>IP93g!4
zv&$#Gl8CBm(k4tcDz>H=H(PyR>ysIGawMen&bw#ghp~C7tu!3K!q;a?68)^?(w9`F
zHxATULEhNTN^Rkv8}!O<eT?pQuwW)u<{M|(4AG0(MlxTMtNI-mK_X?$QoX&ub_1Hz
z%(gc^HXL!mXTVn2dch&G4S${<-x=1z^n_;6x#+X$qa9UwLfNl|KnxuJ!T{V4SDvQm
zvMKfm>KundRzj&?vRUat>)htLrpF!Po&NlPbqQ|t-`NHU&|1~R<6>0Y#?Z3(7T^&;
zv%x$Yo1=UL35GEL;q(8P{RM$eeBL_?ax5N!`>gmb$f1Gh1Kc7AXJ-NIf)UgaIH{0|
zIsMZ;wz*_aZffGICt)`uM&6*~@s|CM65PA7<UNfqwDpzry(aHko4Ji=qLYqG?P7UP
zOU*fpTj`9Mi`QSg?!bQ{lEb!+89|pulAmDN5vM}oGJ*$-T&O#VrnFJlO0#+=Y%>+X
ztF0$F3aRHg!Ew>75q|~Hl|$JCq;8fM)TJ#(D4o=6wl)0o^%CMu+i)`PA{wlaY!69}
z9XPH?C)&*D=7sVCF+(t@>35|<PE%*%X^Af0-c&E!hOBWu?l?4~L1st;eB>BsU(y+O
z`c^{n4}TsIRh>^A#M9#@fyn2`BmdC|ckefBH*0`EH3@%MYcq{Jj|qo0mG4RO6|d}0
z@8u5}+qGapbcq@A2YlnLqNhB`|EMK8KAJ2Zhjq#^GRshXswmR~5ZspRbVCj@SD=xi
z3M}$RoMiz1r~_0SPBjL<7q;WA`CcxlII2}o;?FpwaLN>6qCKfaO?S@r8X|$z`mU*%
zk*ZM5*u)}rc>zd)DVI!PR*`x*5fbI8fxcS`zuCbRI{)<@+`FpVJo?41ZB~`yPDUUl
y1er|d?4wgb;X(igiMl+x-r3`?cVQ@0XH=pqxn;r(->m<94QN|Jf@*&_nDK8H{P*er

diff --git a/web/pandas/static/img/install/pandas_import_and_version.png b/web/pandas/static/img/install/pandas_import_and_version.png
deleted file mode 100644
index 64c1303ac495ccf72a7c649401cce26c47c15ace..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2252
zcmZ{mc|6<M7QoG@QWa_}sjaH5qNP-8sV$|JVl1VEhN!&+sil%wpQf~yQA<&bQLWZP
zNYU`{OScGO?V{3#{0K@g1Sv7rNjsf+@1OV2z4vqPx%ZxX&i9;i)14h{Afoc30s;aM
z+l!Vi0s=n*{PD&fL4MmHny$|8bew;_Y^9<o=LpTl%Oj3k7d*5P3cl)fvgG%B1w|iz
z(P$^PExgQqKwlr59AE$3z+GF}-rd^0G5cIVKm=}UX?_{YSYQsK6lRsWmTg30-X=ed
zyrHRS#CzvlCyUoMl2x&=h}NihMzGQV&z(J{yEpvR8*fpSTbkYqDH1eajj#&nprYM1
zs8?6X9Se=X3Uv>Wpy{dDhjCxN;DWyRAKT3#6!c$7O199%SvO7=DJRb0p64C6$g|JF
zTAw<-^m@N`3KWT@3E`Bzyd#cqWCog8sr(66Dz64>{Nn8&KY_ihRZ97es|LP<LvRgj
zOLa9Ep&*1kw^Ms4iTHV+<fQjFJX9N*zzxZ&G)gyM=nN=0?V^-qGw((i66?i%LXoH)
z07doN2j&=*6O1{J71D{8)_XabUO7&*{se3Hcd6$q165T6w-2i(=7!w)Rl8>`*jD=h
zEA8-nu=H%uPN9!@<DBEp;H^&BsTv^7K%RU<?XmC^dGf19_<~0>DZ<V8+^%~UBVu+L
z>)c!iAx_H%kPz3VwxTGZa}dW^iYqWb9BWL!1a*TciL)^!y#3fsKrS2sQC_&=q+eG<
z(9?22qGyJ}=aJC{Crg9O7l;h^oa&wdn^4N)rkwYInz<{Tbd1Z3tP_AKm`uf%R+|w^
zoz$xhN({Qy06RzSMthB@ItW~}0$$x6N@D^t%NxT%Ojpd@vWucL2OWs&RuK2>uk|XG
zMCXvS^a^Vw(P!h%Y`l@3+WW|0M1ed!h6(6sr&VZ`+^?1x6T^}6#N69XHrJ4^*nI%o
zS{7&}Rk%$OyvMrL(~M0A!<!z25?ff?n-SgdwfVk&`i%q;TzhES^#cQnJxa@h{qxnu
zc<rQ5F5L7SI`mv?0V)Hr7OYWSB^`<{Irndc0@x+47!10ybJn+kU>JQV!S9H%Ta7=;
zK$aRD5Q}uHWRcGzVGj1U(~bdO5doI+<d9|%L15`8J{d(`8k}}8IwasY=s$U5>ph(6
zx9liJ_c&wMNqCuZag9-=0KQ(?fy2hu*@O|Y?xHQuV=j3k#wqYWyl(Xx7_VykN!MKc
z>29swK`j%`+nM-yqTO=&;KA02?nTD=-PwlcV^_~BFI4;VBeQtCnZ&`xD-AJ*2|6+G
z0B4T`<f@>ujB+_8-moJ%?y_zcsp|rgqB8-Yjy|5c$m)uHPUdddMq=?#gO#V=Hx{oq
zz-{9)`{H-SK)5wcNgPS76<ugoA$A&NbF)pvI<a&UxAZiCl~3zmX(q8WTh3IO*zyMQ
zncDTH#Cme@${_`m1Oy!iuBV{0c<b39tP_Rk{~*))_JnY;)NW_X-6<B?YEp0L)d}$g
z4@MDmT+7TPpQq+8CG4%(OrEw8@<fS7&QIk!n_ido{USfJ(pe?LopQUazwK85vP}~i
zQ8OEK<SI5P3K^+m$2}>&LSuJmK0<NYYEkIvd+DPj@1_nM$A?)_hmc3Ov%h-wvs;|@
zyj(whzfQC|=$hHE`QD*#?fCL<u+t;SQBL5UtRXO%8jbVs6+M@NnP{9?b(n-esYiP=
zqGhQoY!(RmqycFILb6l?OU!Vg<Ck8nN!Tp=&6(0-o~3oXQNRGOho1vfmy&MdXpf)Y
z8e|?VlFASyL6JZ5S@_1lOFk8R8Cna=^a%}H459fKc^&>co$TXNbMkZ3%yQ<Pma^X_
z_P6|eU5$HUcpMjl6Gub9Vw}%TbQUjQysQkqhArpexGhX4mjWgy-OZ+wS!UO!(ow@$
z+;YCJl^Y96JwZrLr*yX#RPN&l;mQc@NlR7CKs8h;B)XTdYE8_KD)i$0()7C~$$gd0
zjETwuLvVpvn5G@N_iCighkr317^;u9hv%*%+}m6F(&W2)j|1E(Qh!`|%Unc_g+=83
z3d38#KwdaQ1)T_+2%l-@(e==-AF^&f3OX2$Ra@Gb{KS<+s%b}{z=U^UwHHdK!>w-X
z=YB+Iz^)-}wnj(ofUwMCRNr^(f-1+pp@WAq&7MkPUyNk>VMAuWnu%`}P8hH@<@RaA
z3|K<Kkr|X!tiN}(TDi$5J@m||1Idc_X*C;PIUli)>!1qZTm~rhILb4p*#0s?et2m(
zGgtyMG`g0TZ|YI^%2<Z;6oqu9vI6K^f$=1bThn5gp%{3h^B9psF;+zrQS~_f&ar2G
zEkVSt`q-E8=Ab!!RKG~rn=2t4w*YIe^Saxjv~r46G+tJS`KX7^go(FJpGJNzgKmU~
zk^Sv=9ZHrQeH2B2%+C13mM$WZPm(Sy5Jhs+iVDU5P6xQ975%UQ71|$)<Er1i{tp^n
zs_6bUwm5dMdwr7;6K2n#IlK7m8Iv$@cVUS~sesG*WJ|zY|IMykEH7e4FoeSnFN>nw
z#K?lV{FT{z#<NQqF7=C+BU`#9O-tFoUVq@^S+`aO`zUu$Ojivg_DFJ?;k+Z@qUimx
z3<IR%JEjG;I2U0tIt(uzN^4!-D+fWi<Kc;oN86R-e!ydrsOAx>CRBwftD3G6$s_n_
zP2i|nyh^k(*~M=^=b>f>Z4~>JSo`tRqVHJq(f_M5>b~B5AuMsoLn>kr(HxxAyOu1?
zsf6OMzfLK3G8Fi4FmNEB7m~IzKMDcd>FrgvD&Noc;9}PyV97<1f9wQbR9YD8DJ^xy
z=-Wm75<5ffMfI(t&9Oh6_*#O$Yw#_^{tCYDwfX<4_K&^)M`!*MFup6v6KhATdP%`I
R`QH-(TPsJ)dW)+!{{$*Kl8*oY


From b9488218ae27b70d1669a932ab16e8ce5a257cf0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 25 Sep 2024 14:47:10 -1000
Subject: [PATCH 146/176] CI/TST: Check for tzset in set_timezone (#59893)

* CI/TST: Check for tzset in set_timezone

* adjust test message
---
 pandas/_testing/contexts.py         | 17 +++++++++--------
 pandas/tests/tslibs/test_parsing.py | 11 +++++++----
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
index 91b5d2a981bef..4ca67d6fc082d 100644
--- a/pandas/_testing/contexts.py
+++ b/pandas/_testing/contexts.py
@@ -73,14 +73,15 @@ def set_timezone(tz: str) -> Generator[None, None, None]:
     import time
 
     def setTZ(tz) -> None:
-        if tz is None:
-            try:
-                del os.environ["TZ"]
-            except KeyError:
-                pass
-        else:
-            os.environ["TZ"] = tz
-            time.tzset()
+        if hasattr(time, "tzset"):
+            if tz is None:
+                try:
+                    del os.environ["TZ"]
+                except KeyError:
+                    pass
+            else:
+                os.environ["TZ"] = tz
+                time.tzset()
 
     orig_tz = os.environ.get("TZ")
     setTZ(tz)
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index 9b64beaf09273..07425af8ed37a 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -37,10 +37,13 @@
 )
 def test_parsing_tzlocal_deprecated():
     # GH#50791
-    msg = (
-        r"Parsing 'EST' as tzlocal \(dependent on system timezone\) "
-        r"is no longer supported\. "
-        "Pass the 'tz' keyword or call tz_localize after construction instead"
+    msg = "|".join(
+        [
+            r"Parsing 'EST' as tzlocal \(dependent on system timezone\) "
+            r"is no longer supported\. "
+            "Pass the 'tz' keyword or call tz_localize after construction instead",
+            ".*included an un-recognized timezone",
+        ]
     )
     dtstr = "Jan 15 2004 03:00 EST"
 

From 23c497bb2f7e05af1fda966e7fb04db942453559 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 26 Sep 2024 05:06:07 -1000
Subject: [PATCH 147/176] DOC: Recommend conda from miniforge for contributing
 environment (#59894)

---
 doc/source/development/contributing.rst       |  6 ++---
 .../development/contributing_codebase.rst     |  2 +-
 .../development/contributing_environment.rst  | 23 +++++++++----------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
index fe5271dab7132..4d99f282aa695 100644
--- a/doc/source/development/contributing.rst
+++ b/doc/source/development/contributing.rst
@@ -305,15 +305,15 @@ It is important to periodically update your local ``main`` branch with updates f
 branch and update your development environment to reflect any changes to the various packages that
 are used during development.
 
-If using :ref:`mamba <contributing.mamba>`, run:
+If using :ref:`conda <contributing.conda>`, run:
 
 .. code-block:: shell
 
     git checkout main
     git fetch upstream
     git merge upstream/main
-    mamba activate pandas-dev
-    mamba env update -f environment.yml --prune
+    conda activate pandas-dev
+    conda env update -f environment.yml --prune
 
 If using :ref:`pip <contributing.pip>` , do:
 
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index 9d5a992e911b6..670ffe6996302 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -244,7 +244,7 @@ in your python environment.
 
 .. warning::
 
-    * Please be aware that the above commands will use the current python environment. If your python packages are older/newer than those installed by the pandas CI, the above commands might fail. This is often the case when the ``mypy`` or ``numpy`` versions do not match. Please see :ref:`how to setup the python environment <contributing.mamba>` or select a `recently succeeded workflow <https://github.com/pandas-dev/pandas/actions/workflows/code-checks.yml?query=branch%3Amain+is%3Asuccess>`_, select the "Docstring validation, typing, and other manual pre-commit hooks" job, then click on "Set up Conda" and "Environment info" to see which versions the pandas CI installs.
+    * Please be aware that the above commands will use the current python environment. If your python packages are older/newer than those installed by the pandas CI, the above commands might fail. This is often the case when the ``mypy`` or ``numpy`` versions do not match. Please see :ref:`how to setup the python environment <contributing.conda>` or select a `recently succeeded workflow <https://github.com/pandas-dev/pandas/actions/workflows/code-checks.yml?query=branch%3Amain+is%3Asuccess>`_, select the "Docstring validation, typing, and other manual pre-commit hooks" job, then click on "Set up Conda" and "Environment info" to see which versions the pandas CI installs.
 
 .. _contributing.ci:
 
diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst
index 643021db7b823..1426d3a84a748 100644
--- a/doc/source/development/contributing_environment.rst
+++ b/doc/source/development/contributing_environment.rst
@@ -43,7 +43,7 @@ and consult the ``Linux`` instructions below.
 
 **macOS**
 
-To use the :ref:`mamba <contributing.mamba>`-based compilers, you will need to install the
+To use the :ref:`conda <contributing.conda>`-based compilers, you will need to install the
 Developer Tools using ``xcode-select --install``.
 
 If you prefer to use a different compiler, general information can be found here:
@@ -51,9 +51,9 @@ https://devguide.python.org/setup/#macos
 
 **Linux**
 
-For Linux-based :ref:`mamba <contributing.mamba>` installations, you won't have to install any
-additional components outside of the mamba environment. The instructions
-below are only needed if your setup isn't based on mamba environments.
+For Linux-based :ref:`conda <contributing.conda>` installations, you won't have to install any
+additional components outside of the conda environment. The instructions
+below are only needed if your setup isn't based on conda environments.
 
 Some Linux distributions will come with a pre-installed C compiler. To find out
 which compilers (and versions) are installed on your system::
@@ -82,19 +82,18 @@ Before we begin, please:
 * Make sure that you have :any:`cloned the repository <contributing.forking>`
 * ``cd`` to the pandas source directory you just created with the clone command
 
-.. _contributing.mamba:
+.. _contributing.conda:
 
-Option 1: using mamba (recommended)
+Option 1: using conda (recommended)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-* Install miniforge to get `mamba <https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html>`_
-* Make sure your mamba is up to date (``mamba update mamba``)
-* Create and activate the ``pandas-dev`` mamba environment using the following commands:
+* Install miniforge to get `conda <https://github.com/conda-forge/miniforge?tab=readme-ov-file#download>`_
+* Create and activate the ``pandas-dev`` conda environment using the following commands:
 
-.. code-block:: none
+.. code-block:: bash
 
-   mamba env create --file environment.yml
-   mamba activate pandas-dev
+   conda env create --file environment.yml
+   conda activate pandas-dev
 
 .. _contributing.pip:
 

From 5ced458f6318f0319877ab655b8cb6b86092ea62 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Sat, 28 Sep 2024 07:51:30 -0400
Subject: [PATCH 148/176] CI: Pin micromamba to 1.x (#59912)

---
 .github/actions/setup-conda/action.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml
index 3eb68bdd2a15c..4fe901998cbcc 100644
--- a/.github/actions/setup-conda/action.yml
+++ b/.github/actions/setup-conda/action.yml
@@ -9,6 +9,8 @@ runs:
     - name: Install ${{ inputs.environment-file }}
       uses: mamba-org/setup-micromamba@v1
       with:
+        # Pinning to avoid 2.0 failures
+        micromamba-version: '1.5.10-0'
         environment-file: ${{ inputs.environment-file }}
         environment-name: test
         condarc-file: ci/.condarc

From 96de1f13103cd21417101de9d555f203cf93867a Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 29 Sep 2024 00:07:34 +0530
Subject: [PATCH 149/176] DOC: fix SA01, ES01 for pandas.Series.sparse.npoints
 (#59896)

* DOC: fix SA01, ES01 for pandas.Series.sparse.npoints

* Update pandas/core/arrays/sparse/array.py
---
 ci/code_checks.sh                  |  1 -
 pandas/core/arrays/sparse/array.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f662b4781e84b..149c5c0326733 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -97,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
         -i "pandas.Series.sparse.from_coo PR07,SA01" \
-        -i "pandas.Series.sparse.npoints SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index c8ec4068ca199..0c76280e7fdb4 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -708,6 +708,18 @@ def npoints(self) -> int:
         """
         The number of non- ``fill_value`` points.
 
+        This property returns the number of elements in the sparse series that are
+        not equal to the ``fill_value``. Sparse data structures store only the
+        non-``fill_value`` elements, reducing memory usage when the majority of
+        values are the same.
+
+        See Also
+        --------
+        Series.sparse.to_dense : Convert a Series from sparse values to dense.
+        Series.sparse.fill_value : Elements in ``data`` that are ``fill_value`` are
+            not stored.
+        Series.sparse.density : The percent of non- ``fill_value`` points, as decimal.
+
         Examples
         --------
         >>> from pandas.arrays import SparseArray

From cf12e6722cfaba646e7f0a1e5e8db88be8d076cd Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 29 Sep 2024 00:08:55 +0530
Subject: [PATCH 150/176] DOC: fix RT03, ES01 for
 pandas.core.groupby.DataFrameGroupBy.agg and
 pandas.core.groupby.DataFrameGroupBy.aggregate (#59869)

* DOC: add double backticks for sphinx compatibility

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>

* DOC: remove _agg_template_frame

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>

* DOC: fix RT03, ES01 for pandas.core.groupby.DataFrameGroupBy.aggregate

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>
---
 ci/code_checks.sh                     |   2 -
 pandas/core/groupby/generic.py        | 176 +++++++++++++++++++++++++-
 pandas/core/groupby/groupby.py        |  78 ------------
 scripts/validate_unwanted_patterns.py |   1 -
 4 files changed, 174 insertions(+), 83 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 149c5c0326733..669c793737161 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -127,8 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.SparseArray PR07,SA01" \
         -i "pandas.arrays.TimedeltaArray PR07,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
-        -i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index bec9d344d42e2..0c211afb5073c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -67,7 +67,6 @@
 from pandas.core.groupby.groupby import (
     GroupBy,
     GroupByPlot,
-    _agg_template_frame,
     _agg_template_series,
     _transform_template,
 )
@@ -1515,8 +1514,181 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     """
     )
 
-    @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame")
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
+        """
+        Aggregate using one or more operations.
+
+        The ``aggregate`` function allows the application of one or more aggregation
+        operations on groups of data within a DataFrameGroupBy object. It supports
+        various aggregation methods, including user-defined functions and predefined
+        functions such as 'sum', 'mean', etc.
+
+        Parameters
+        ----------
+        func : function, str, list, dict or None
+            Function to use for aggregating the data. If a function, must either
+            work when passed a DataFrame or when passed to DataFrame.apply.
+
+            Accepted combinations are:
+
+            - function
+            - string function name
+            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+            - dict of index labels -> functions, function names or list of such.
+            - None, in which case ``**kwargs`` are used with Named Aggregation. Here the
+              output has one column for each element in ``**kwargs``. The name of the
+              column is keyword, whereas the value determines the aggregation used to
+              compute the values in the column.
+
+              Can also accept a Numba JIT function with
+              ``engine='numba'`` specified. Only passing a single function is supported
+              with this engine.
+
+              If the ``'numba'`` engine is chosen, the function must be
+              a user defined function with ``values`` and ``index`` as the
+              first and second arguments respectively in the function signature.
+              Each group's index will be passed to the user defined function
+              and optionally available for use.
+
+        *args
+            Positional arguments to pass to func.
+        engine : str, default None
+            * ``'cython'`` : Runs the function through C-extensions from cython.
+            * ``'numba'`` : Runs the function through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or globally setting
+                ``compute.use_numba``
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+              and ``parallel`` dictionary keys. The values must either be ``True`` or
+              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+              applied to the function
+
+        **kwargs
+            * If ``func`` is None, ``**kwargs`` are used to define the output names and
+              aggregations via Named Aggregation. See ``func`` entry.
+            * Otherwise, keyword arguments to be passed into func.
+
+        Returns
+        -------
+        DataFrame
+            Aggregated DataFrame based on the grouping and the applied aggregation
+            functions.
+
+        See Also
+        --------
+        DataFrame.groupby.apply : Apply function func group-wise
+            and combine the results together.
+        DataFrame.groupby.transform : Transforms the Series on each group
+            based on the given function.
+        DataFrame.aggregate : Aggregate using one or more operations.
+
+        Notes
+        -----
+        When using ``engine='numba'``, there will be no "fall back" behavior internally.
+        The group data and group index will be passed as numpy arrays to the JITed
+        user defined function, and no alternative execution attempts will be tried.
+
+        Functions that mutate the passed object can produce unexpected
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
+        for more details.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the passed ``func``,
+            see the examples below.
+
+        Examples
+        --------
+        >>> data = {
+        ...     "A": [1, 1, 2, 2],
+        ...     "B": [1, 2, 3, 4],
+        ...     "C": [0.362838, 0.227877, 1.267767, -0.562860],
+        ... }
+        >>> df = pd.DataFrame(data)
+        >>> df
+           A  B         C
+        0  1  1  0.362838
+        1  1  2  0.227877
+        2  2  3  1.267767
+        3  2  4 -0.562860
+
+        The aggregation is for each column.
+
+        >>> df.groupby("A").agg("min")
+           B         C
+        A
+        1  1  0.227877
+        2  3 -0.562860
+
+        Multiple aggregations
+
+        >>> df.groupby("A").agg(["min", "max"])
+            B             C
+          min max       min       max
+        A
+        1   1   2  0.227877  0.362838
+        2   3   4 -0.562860  1.267767
+
+        Select a column for aggregation
+
+        >>> df.groupby("A").B.agg(["min", "max"])
+           min  max
+        A
+        1    1    2
+        2    3    4
+
+        User-defined function for aggregation
+
+        >>> df.groupby("A").agg(lambda x: sum(x) + 2)
+            B          C
+        A
+        1       5       2.590715
+        2       9       2.704907
+
+        Different aggregations per column
+
+        >>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"})
+            B             C
+          min max       sum
+        A
+        1   1   2  0.590715
+        2   3   4  0.704907
+
+        To control the output names with different aggregations per column,
+        pandas supports "named aggregation"
+
+        >>> df.groupby("A").agg(
+        ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
+        ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
+        ... )
+           b_min     c_sum
+        A
+        1      1  0.590715
+        2      3  0.704907
+
+        - The keywords are the *output* column names
+        - The values are tuples whose first element is the column to select
+          and the second element is the aggregation to apply to that column.
+          Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
+          ``['column', 'aggfunc']`` to make it clearer what the arguments are.
+          As usual, the aggregation can be a callable or a string alias.
+
+        See :ref:`groupby.aggregate.named` for more.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the aggregating
+            function.
+
+        >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
+              B
+        A
+        1   1.0
+        2   3.0
+        """
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 38dad446b4c39..9e36837bc679f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -445,84 +445,6 @@ class providing the base-class of operations.
     see the examples below.
 {examples}"""
 
-_agg_template_frame = """
-Aggregate using one or more operations.
-
-Parameters
-----------
-func : function, str, list, dict or None
-    Function to use for aggregating the data. If a function, must either
-    work when passed a {klass} or when passed to {klass}.apply.
-
-    Accepted combinations are:
-
-    - function
-    - string function name
-    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
-    - dict of index labels -> functions, function names or list of such.
-    - None, in which case ``**kwargs`` are used with Named Aggregation. Here the
-      output has one column for each element in ``**kwargs``. The name of the
-      column is keyword, whereas the value determines the aggregation used to compute
-      the values in the column.
-
-      Can also accept a Numba JIT function with
-      ``engine='numba'`` specified. Only passing a single function is supported
-      with this engine.
-
-      If the ``'numba'`` engine is chosen, the function must be
-      a user defined function with ``values`` and ``index`` as the
-      first and second arguments respectively in the function signature.
-      Each group's index will be passed to the user defined function
-      and optionally available for use.
-
-*args
-    Positional arguments to pass to func.
-engine : str, default None
-    * ``'cython'`` : Runs the function through C-extensions from cython.
-    * ``'numba'`` : Runs the function through JIT compiled code from numba.
-    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
-
-engine_kwargs : dict, default None
-    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
-    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
-      and ``parallel`` dictionary keys. The values must either be ``True`` or
-      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
-      ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
-      applied to the function
-
-**kwargs
-    * If ``func`` is None, ``**kwargs`` are used to define the output names and
-      aggregations via Named Aggregation. See ``func`` entry.
-    * Otherwise, keyword arguments to be passed into func.
-
-Returns
--------
-{klass}
-
-See Also
---------
-{klass}.groupby.apply : Apply function func group-wise
-    and combine the results together.
-{klass}.groupby.transform : Transforms the Series on each group
-    based on the given function.
-{klass}.aggregate : Aggregate using one or more operations.
-
-Notes
------
-When using ``engine='numba'``, there will be no "fall back" behavior internally.
-The group data and group index will be passed as numpy arrays to the JITed
-user defined function, and no alternative execution attempts will be tried.
-
-Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
-for more details.
-
-.. versionchanged:: 1.3.0
-
-    The resulting dtype will reflect the return value of the passed ``func``,
-    see the examples below.
-{examples}"""
-
 
 @final
 class GroupByPlot(PandasObject):
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index 35f6ffb4980df..5962709056ae8 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -30,7 +30,6 @@
     "_new_Index",
     "_new_PeriodIndex",
     "_agg_template_series",
-    "_agg_template_frame",
     "_pipe_template",
     "_apply_groupings_depr",
     "__main__",

From d538a1cd1ad5d1e506c2dc36144e4cac5534858a Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sun, 29 Sep 2024 01:08:32 +0530
Subject: [PATCH 151/176] DOC: fix RT03, ES01 for
 pandas.core.groupby.SeriesGroupBy.agg and
 pandas.core.groupby.SeriesGroupBy.aggregate (#59898)

* DOC: fix RT03, ES01 for pandas.core.groupby.SeriesGroupBy.agg

* DOC: remove _agg_template_series

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>

* DOC: remove _agg_template_series

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>

* DOC: remove _agg_template_seris

---------

Co-authored-by: mroeschke <mroeschke@users.noreply.github.com>
Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>
---
 ci/code_checks.sh                     |   2 -
 pandas/core/groupby/generic.py        | 136 +++++++++++++++++++++++++-
 pandas/core/groupby/groupby.py        |  81 ---------------
 scripts/validate_unwanted_patterns.py |   1 -
 4 files changed, 134 insertions(+), 86 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 669c793737161..b65dcedbd8a10 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -137,8 +137,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
-        -i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
         -i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0c211afb5073c..110c0ea88a0a1 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -67,7 +67,6 @@
 from pandas.core.groupby.groupby import (
     GroupBy,
     GroupByPlot,
-    _agg_template_series,
     _transform_template,
 )
 from pandas.core.indexes.api import (
@@ -323,8 +322,141 @@ def apply(self, func, *args, **kwargs) -> Series:
         """
         return super().apply(func, *args, **kwargs)
 
-    @doc(_agg_template_series, examples=_agg_examples_doc, klass="Series")
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
+        """
+        Aggregate using one or more operations.
+
+        The ``aggregate`` method enables flexible and efficient aggregation of grouped
+        data using a variety of functions, including built-in, user-defined, and
+        optimized JIT-compiled functions.
+
+        Parameters
+        ----------
+        func : function, str, list, dict or None
+            Function to use for aggregating the data. If a function, must either
+            work when passed a Series or when passed to Series.apply.
+
+            Accepted combinations are:
+
+            - function
+            - string function name
+            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+            - None, in which case ``**kwargs`` are used with Named Aggregation. Here
+              the output has one column for each element in ``**kwargs``. The name of
+              the column is keyword, whereas the value determines the aggregation
+              used to compute the values in the column.
+
+              Can also accept a Numba JIT function with
+              ``engine='numba'`` specified. Only passing a single function is supported
+              with this engine.
+
+              If the ``'numba'`` engine is chosen, the function must be
+              a user defined function with ``values`` and ``index`` as the
+              first and second arguments respectively in the function signature.
+              Each group's index will be passed to the user defined function
+              and optionally available for use.
+
+            .. deprecated:: 2.1.0
+
+                Passing a dictionary is deprecated and will raise in a future version
+                of pandas. Pass a list of aggregations instead.
+        *args
+            Positional arguments to pass to func.
+        engine : str, default None
+            * ``'cython'`` : Runs the function through C-extensions from cython.
+            * ``'numba'`` : Runs the function through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or globally setting
+                ``compute.use_numba``
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+              and ``parallel`` dictionary keys. The values must either be ``True`` or
+              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+              applied to the function
+
+        **kwargs
+            * If ``func`` is None, ``**kwargs`` are used to define the output names and
+              aggregations via Named Aggregation. See ``func`` entry.
+            * Otherwise, keyword arguments to be passed into func.
+
+        Returns
+        -------
+        Series
+            Aggregated Series based on the grouping and the applied aggregation
+            functions.
+
+        See Also
+        --------
+        SeriesGroupBy.apply : Apply function func group-wise
+            and combine the results together.
+        SeriesGroupBy.transform : Transforms the Series on each group
+            based on the given function.
+        Series.aggregate : Aggregate using one or more operations.
+
+        Notes
+        -----
+        When using ``engine='numba'``, there will be no "fall back" behavior internally.
+        The group data and group index will be passed as numpy arrays to the JITed
+        user defined function, and no alternative execution attempts will be tried.
+
+        Functions that mutate the passed object can produce unexpected
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
+        for more details.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the passed ``func``,
+            see the examples below.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> s.groupby([1, 1, 2, 2]).min()
+        1    1
+        2    3
+        dtype: int64
+
+        >>> s.groupby([1, 1, 2, 2]).agg("min")
+        1    1
+        2    3
+        dtype: int64
+
+        >>> s.groupby([1, 1, 2, 2]).agg(["min", "max"])
+           min  max
+        1    1    2
+        2    3    4
+
+        The output column names can be controlled by passing
+        the desired column names and aggregations as keyword arguments.
+
+        >>> s.groupby([1, 1, 2, 2]).agg(
+        ...     minimum="min",
+        ...     maximum="max",
+        ... )
+           minimum  maximum
+        1        1        2
+        2        3        4
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the aggregating
+            function.
+
+        >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
+        1    1.0
+        2    3.0
+        dtype: float64
+        """
         relabeling = func is None
         columns = None
         if relabeling:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 9e36837bc679f..e2410788ea95e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -364,87 +364,6 @@ class providing the base-class of operations.
 --------
 %(example)s"""
 
-_agg_template_series = """
-Aggregate using one or more operations.
-
-Parameters
-----------
-func : function, str, list, dict or None
-    Function to use for aggregating the data. If a function, must either
-    work when passed a {klass} or when passed to {klass}.apply.
-
-    Accepted combinations are:
-
-    - function
-    - string function name
-    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
-    - None, in which case ``**kwargs`` are used with Named Aggregation. Here the
-      output has one column for each element in ``**kwargs``. The name of the
-      column is keyword, whereas the value determines the aggregation used to compute
-      the values in the column.
-
-      Can also accept a Numba JIT function with
-      ``engine='numba'`` specified. Only passing a single function is supported
-      with this engine.
-
-      If the ``'numba'`` engine is chosen, the function must be
-      a user defined function with ``values`` and ``index`` as the
-      first and second arguments respectively in the function signature.
-      Each group's index will be passed to the user defined function
-      and optionally available for use.
-
-    .. deprecated:: 2.1.0
-
-        Passing a dictionary is deprecated and will raise in a future version
-        of pandas. Pass a list of aggregations instead.
-*args
-    Positional arguments to pass to func.
-engine : str, default None
-    * ``'cython'`` : Runs the function through C-extensions from cython.
-    * ``'numba'`` : Runs the function through JIT compiled code from numba.
-    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
-
-engine_kwargs : dict, default None
-    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
-    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
-      and ``parallel`` dictionary keys. The values must either be ``True`` or
-      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
-      ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
-      applied to the function
-
-**kwargs
-    * If ``func`` is None, ``**kwargs`` are used to define the output names and
-      aggregations via Named Aggregation. See ``func`` entry.
-    * Otherwise, keyword arguments to be passed into func.
-
-Returns
--------
-{klass}
-
-See Also
---------
-{klass}GroupBy.apply : Apply function func group-wise
-    and combine the results together.
-{klass}GroupBy.transform : Transforms the Series on each group
-    based on the given function.
-{klass}.aggregate : Aggregate using one or more operations.
-
-Notes
------
-When using ``engine='numba'``, there will be no "fall back" behavior internally.
-The group data and group index will be passed as numpy arrays to the JITed
-user defined function, and no alternative execution attempts will be tried.
-
-Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
-for more details.
-
-.. versionchanged:: 1.3.0
-
-    The resulting dtype will reflect the return value of the passed ``func``,
-    see the examples below.
-{examples}"""
-
 
 @final
 class GroupByPlot(PandasObject):
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index 5962709056ae8..076acc359f933 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -29,7 +29,6 @@
     "_shared_docs",
     "_new_Index",
     "_new_PeriodIndex",
-    "_agg_template_series",
     "_pipe_template",
     "_apply_groupings_depr",
     "__main__",

From 34f546f8e73386659457fec0b3fa1ef5b0c6d569 Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <saldanhadeepakuconn@gmail.com>
Date: Sun, 29 Sep 2024 23:05:45 +0530
Subject: [PATCH 152/176] DOC: fix docstrings for multiple api.types methods
 (#59920)

fix docstrings for api.types
---
 ci/code_checks.sh               |  5 ---
 pandas/core/dtypes/inference.py | 63 ++++++++++++++++++++++++++++++---
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b65dcedbd8a10..2b3e83d64ab21 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -107,14 +107,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.types.is_dict_like PR07,SA01" \
-        -i "pandas.api.types.is_file_like PR07,SA01" \
         -i "pandas.api.types.is_float PR01,SA01" \
-        -i "pandas.api.types.is_hashable PR01,RT03,SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
-        -i "pandas.api.types.is_named_tuple PR07,SA01" \
-        -i "pandas.api.types.is_re PR07,SA01" \
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
index f042911b53d2b..6adb34ff0f777 100644
--- a/pandas/core/dtypes/inference.py
+++ b/pandas/core/dtypes/inference.py
@@ -113,13 +113,24 @@ def is_file_like(obj: object) -> bool:
 
     Parameters
     ----------
-    obj : The object to check
+    obj : object
+        The object to check for file-like properties.
+        This can be any Python object, and the function will
+        check if it has attributes typically associated with
+        file-like objects (e.g., `read`, `write`, `__iter__`).
 
     Returns
     -------
     bool
         Whether `obj` has file-like properties.
 
+    See Also
+    --------
+    api.types.is_dict_like : Check if the object is dict-like.
+    api.types.is_hashable : Return True if hash(obj) will succeed, False otherwise.
+    api.types.is_named_tuple : Check if the object is a named tuple.
+    api.types.is_iterator : Check if the object is an iterator.
+
     Examples
     --------
     >>> import io
@@ -142,13 +153,24 @@ def is_re(obj: object) -> TypeGuard[Pattern]:
 
     Parameters
     ----------
-    obj : The object to check
+    obj : object
+        The object to check for being a regex pattern. Typically,
+        this would be an object that you expect to be a compiled
+        pattern from the `re` module.
 
     Returns
     -------
     bool
         Whether `obj` is a regex pattern.
 
+    See Also
+    --------
+    api.types.is_float : Return True if given object is float.
+    api.types.is_iterator : Check if the object is an iterator.
+    api.types.is_integer : Return True if given object is integer.
+    api.types.is_re_compilable : Check if the object can be compiled
+                                into a regex pattern instance.
+
     Examples
     --------
     >>> from pandas.api.types import is_re
@@ -275,13 +297,22 @@ def is_dict_like(obj: object) -> bool:
 
     Parameters
     ----------
-    obj : The object to check
+    obj : object
+        The object to check. This can be any Python object,
+        and the function will determine whether it
+        behaves like a dictionary.
 
     Returns
     -------
     bool
         Whether `obj` has dict-like properties.
 
+    See Also
+    --------
+    api.types.is_list_like : Check if the object is list-like.
+    api.types.is_file_like : Check if the object is a file-like.
+    api.types.is_named_tuple : Check if the object is a named tuple.
+
     Examples
     --------
     >>> from pandas.api.types import is_dict_like
@@ -308,13 +339,22 @@ def is_named_tuple(obj: object) -> bool:
 
     Parameters
     ----------
-    obj : The object to check
+    obj : object
+        The object that will be checked to determine
+        whether it is a named tuple.
 
     Returns
     -------
     bool
         Whether `obj` is a named tuple.
 
+    See Also
+    --------
+    api.types.is_dict_like: Check if the object is dict-like.
+    api.types.is_hashable: Return True if hash(obj)
+                                  will succeed, False otherwise.
+    api.types.is_categorical_dtype : Check if the dtype is categorical.
+
     Examples
     --------
     >>> from collections import namedtuple
@@ -340,9 +380,24 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]:
     Distinguish between these and other types by trying the call to hash() and
     seeing if they raise TypeError.
 
+    Parameters
+    ----------
+    obj : object
+        The object to check for hashability. Any Python object can be passed here.
+
     Returns
     -------
     bool
+        True if object can be hashed (i.e., does not raise TypeError when
+        passed to hash()), and False otherwise (e.g., if object is mutable
+        like a list or dictionary).
+
+    See Also
+    --------
+    api.types.is_float : Return True if given object is float.
+    api.types.is_iterator : Check if the object is an iterator.
+    api.types.is_list_like : Check if the object is list-like.
+    api.types.is_dict_like : Check if the object is dict-like.
 
     Examples
     --------

From 5b35c77041a74b53ebd7c330ca5930fa22929726 Mon Sep 17 00:00:00 2001
From: gameofby <yanyantdcq@outlook.com>
Date: Mon, 30 Sep 2024 01:36:34 +0800
Subject: [PATCH 153/176] DOC: the table name should be
 `air_quality_parameters` rather than `air_quality_parameters_name` (#59918)

---
 .../getting_started/intro_tutorials/08_combine_dataframes.rst   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index 05729809491b5..024300bb8a9b0 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -271,7 +271,7 @@ Add the parameters' full description and name, provided by the parameters metada
 
 Compared to the previous example, there is no common column name.
 However, the ``parameter`` column in the ``air_quality`` table and the
-``id`` column in the ``air_quality_parameters_name`` both provide the
+``id`` column in the ``air_quality_parameters`` table both provide the
 measured variable in a common format. The ``left_on`` and ``right_on``
 arguments are used here (instead of just ``on``) to make the link
 between the two tables.

From 90c26ce7ce04d97fdabb394e604ecee0a558c019 Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <saldanhadeepakuconn@gmail.com>
Date: Mon, 30 Sep 2024 00:25:17 +0530
Subject: [PATCH 154/176] DOC: Separate out examples - pandas.str.is methods
 (#59850)

---
 pandas/core/strings/accessor.py | 193 ++++++++++++++++++++++++++------
 1 file changed, 156 insertions(+), 37 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 6d10365a1b968..10117aa6bf503 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -3443,10 +3443,10 @@ def casefold(self):
     Series or Index of bool
         Series or Index of boolean values with the same length as the original
         Series/Index.
-
+    """
+    _shared_docs["isalpha"] = """
     See Also
     --------
-    Series.str.isalpha : Check whether all characters are alphabetic.
     Series.str.isnumeric : Check whether all characters are numeric.
     Series.str.isalnum : Check whether all characters are alphanumeric.
     Series.str.isdigit : Check whether all characters are digits.
@@ -3458,24 +3458,56 @@ def casefold(self):
 
     Examples
     --------
-    **Checks for Alphabetic and Numeric Characters**
 
     >>> s1 = pd.Series(['one', 'one1', '1', ''])
-
     >>> s1.str.isalpha()
     0     True
     1    False
     2    False
     3    False
     dtype: bool
+    """
+    _shared_docs["isnumeric"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
+
+    Examples
+    --------
+    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but
+    also includes other characters that can represent quantities such as
+    unicode fractions.
 
+    >>> s1 = pd.Series(['one', 'one1', '1', ''])
     >>> s1.str.isnumeric()
     0    False
     1    False
     2     True
     3    False
     dtype: bool
+    """
+    _shared_docs["isalnum"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
+    Examples
+    --------
+    >>> s1 = pd.Series(['one', 'one1', '1', ''])
     >>> s1.str.isalnum()
     0     True
     1     True
@@ -3492,47 +3524,72 @@ def casefold(self):
     1    False
     2    False
     dtype: bool
+    """
+    _shared_docs["isdecimal"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
-    **More Detailed Checks for Numeric Characters**
-
-    There are several different but overlapping sets of numeric characters that
-    can be checked for.
+    Examples
+    --------
+    The ``s3.str.isdecimal`` method checks for characters used to form
+    numbers in base 10.
 
     >>> s3 = pd.Series(['23', '³', '⅕', ''])
-
-    The ``s3.str.isdecimal`` method checks for characters used to form numbers
-    in base 10.
-
     >>> s3.str.isdecimal()
     0     True
     1    False
     2    False
     3    False
     dtype: bool
+    """
+    _shared_docs["isdigit"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
-    The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also
-    includes special digits, like superscripted and subscripted digits in
-    unicode.
+    Examples
+    --------
+    Similar to ``str.isdecimal`` but also includes special digits, like
+    superscripted and subscripted digits in unicode.
 
+    >>> s3 = pd.Series(['23', '³', '⅕', ''])
     >>> s3.str.isdigit()
     0     True
     1     True
     2    False
     3    False
     dtype: bool
+    """
 
-    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also
-    includes other characters that can represent quantities such as unicode
-    fractions.
-
-    >>> s3.str.isnumeric()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
+    _shared_docs["isspace"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
-    **Checks for Whitespace**
+    Examples
+    --------
 
     >>> s4 = pd.Series([' ', '\\t\\r\\n ', ''])
     >>> s4.str.isspace()
@@ -3540,30 +3597,74 @@ def casefold(self):
     1     True
     2    False
     dtype: bool
+    """
+    _shared_docs["islower"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
-    **Checks for Character Case**
+    Examples
+    --------
 
     >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
-
     >>> s5.str.islower()
     0     True
     1    False
     2    False
     3    False
     dtype: bool
+    """
+
+    _shared_docs["isupper"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.istitle : Check whether all characters are titlecase.
 
+    Examples
+    --------
+
+    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
     >>> s5.str.isupper()
     0    False
     1    False
     2     True
     3    False
     dtype: bool
+    """
+    _shared_docs["istitle"] = """
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
 
+    Examples
+    ------------
     The ``s5.str.istitle`` method checks for whether all words are in title
     case (whether only the first letter of each word is capitalized). Words are
     assumed to be as any sequence of non-numeric characters separated by
     whitespace characters.
 
+    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
     >>> s5.str.istitle()
     0    False
     1     True
@@ -3583,31 +3684,49 @@ def casefold(self):
     # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624)
 
     isalnum = _map_and_wrap(
-        "isalnum", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"]
+        "isalnum",
+        docstring=_shared_docs["ismethods"] % _doc_args["isalnum"]
+        + _shared_docs["isalnum"],
     )
     isalpha = _map_and_wrap(
-        "isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"]
+        "isalpha",
+        docstring=_shared_docs["ismethods"] % _doc_args["isalpha"]
+        + _shared_docs["isalpha"],
     )
     isdigit = _map_and_wrap(
-        "isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"]
+        "isdigit",
+        docstring=_shared_docs["ismethods"] % _doc_args["isdigit"]
+        + _shared_docs["isdigit"],
     )
     isspace = _map_and_wrap(
-        "isspace", docstring=_shared_docs["ismethods"] % _doc_args["isspace"]
+        "isspace",
+        docstring=_shared_docs["ismethods"] % _doc_args["isspace"]
+        + _shared_docs["isspace"],
     )
     islower = _map_and_wrap(
-        "islower", docstring=_shared_docs["ismethods"] % _doc_args["islower"]
+        "islower",
+        docstring=_shared_docs["ismethods"] % _doc_args["islower"]
+        + _shared_docs["islower"],
     )
     isupper = _map_and_wrap(
-        "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"]
+        "isupper",
+        docstring=_shared_docs["ismethods"] % _doc_args["isupper"]
+        + _shared_docs["isupper"],
     )
     istitle = _map_and_wrap(
-        "istitle", docstring=_shared_docs["ismethods"] % _doc_args["istitle"]
+        "istitle",
+        docstring=_shared_docs["ismethods"] % _doc_args["istitle"]
+        + _shared_docs["istitle"],
     )
     isnumeric = _map_and_wrap(
-        "isnumeric", docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"]
+        "isnumeric",
+        docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"]
+        + _shared_docs["isnumeric"],
     )
     isdecimal = _map_and_wrap(
-        "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"]
+        "isdecimal",
+        docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"]
+        + _shared_docs["isdecimal"],
     )
 
 
From d66d5823607ecf4c6d1f8eac9ae679863218f2ba Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <saldanhadeepakuconn@gmail.com>
Date: Mon, 30 Sep 2024 21:29:09 +0530
Subject: [PATCH 155/176] DOC: fix pandas.TimedeltaIndex.to_pytimedelta
 RT03,SA01 (#59914)

* update docstrings

* Update pandas/core/arrays/timedeltas.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh                |  1 -
 pandas/core/arrays/timedeltas.py | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2b3e83d64ab21..fa23adca6d61e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
-        -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp.max PR02" \
         -i "pandas.Timestamp.min PR02" \
         -i "pandas.Timestamp.nanosecond GL08" \
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 754ae277e359a..a8a0037d0bbb9 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -790,6 +790,19 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
         Returns
         -------
         numpy.ndarray
+            A NumPy ``timedelta64`` object representing the same duration as the
+            original pandas ``Timedelta`` object. The precision of the resulting
+            object is in nanoseconds, which is the default
+            time resolution used by pandas for ``Timedelta`` objects, ensuring
+            high precision for time-based calculations.
+
+        See Also
+        --------
+        to_timedelta : Convert argument to timedelta format.
+        Timedelta : Represents a duration between two dates or times.
+        DatetimeIndex: Index of datetime64 data.
+        Timedelta.components : Return a components namedtuple-like
+                               of a single timedelta.
 
         Examples
         --------
@@ -800,6 +813,14 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
         >>> tdelta_idx.to_pytimedelta()
         array([datetime.timedelta(days=1), datetime.timedelta(days=2),
                datetime.timedelta(days=3)], dtype=object)
+
+        >>> tidx = pd.TimedeltaIndex(data=["1 days 02:30:45", "3 days 04:15:10"])
+        >>> tidx
+        TimedeltaIndex(['1 days 02:30:45', '3 days 04:15:10'],
+               dtype='timedelta64[ns]', freq=None)
+        >>> tidx.to_pytimedelta()
+        array([datetime.timedelta(days=1, seconds=9045),
+                datetime.timedelta(days=3, seconds=15310)], dtype=object)
         """
         return ints_to_pytimedelta(self._ndarray)
 

From 111ff84bb958cc7b13a060d9539f83b67ced8f02 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 30 Sep 2024 21:29:49 +0530
Subject: [PATCH 156/176] DOC: fix SA01, ES01 for pandas.errors.ClosedFileError
 (#59924)

---
 ci/code_checks.sh         |  1 -
 pandas/errors/__init__.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fa23adca6d61e..42eedfe8e223b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -159,7 +159,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.CSSWarning SA01" \
         -i "pandas.errors.CategoricalConversionWarning SA01" \
         -i "pandas.errors.ChainedAssignmentError SA01" \
-        -i "pandas.errors.ClosedFileError SA01" \
         -i "pandas.errors.DataError SA01" \
         -i "pandas.errors.DuplicateLabelError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index b9ceae341afd3..46e090cc3a589 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -615,6 +615,16 @@ class ClosedFileError(Exception):
     """
     Exception is raised when trying to perform an operation on a closed HDFStore file.
 
+    ``ClosedFileError`` is specific to operations on ``HDFStore`` objects. Once an
+    HDFStore is closed, its resources are no longer available, and any further attempt
+    to access data or perform file operations will raise this exception.
+
+    See Also
+    --------
+    HDFStore.close : Closes the PyTables file handle.
+    HDFStore.open : Opens the file in the specified mode.
+    HDFStore.is_open : Returns a boolean indicating whether the file is open.
+
     Examples
     --------
     >>> store = pd.HDFStore("my-store", "a")  # doctest: +SKIP

From 1baec153e72f98e7184e972f1e937626703e42a6 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 30 Sep 2024 21:30:32 +0530
Subject: [PATCH 157/176] DOC: fix SA01, ES01 for
 pandas.errors.OutOfBoundsDatetime (#59925)

---
 ci/code_checks.sh                   | 1 -
 pandas/_libs/tslibs/np_datetime.pyx | 9 +++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 42eedfe8e223b..4a1a0042405e3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -168,7 +168,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
         -i "pandas.errors.OptionError SA01" \
-        -i "pandas.errors.OutOfBoundsDatetime SA01" \
         -i "pandas.errors.OutOfBoundsTimedelta SA01" \
         -i "pandas.errors.PerformanceWarning SA01" \
         -i "pandas.errors.PossibleDataLossError SA01" \
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 0b02fc13246f0..193556b2697a9 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -176,6 +176,15 @@ class OutOfBoundsDatetime(ValueError):
     """
     Raised when the datetime is outside the range that can be represented.
 
+    This error occurs when attempting to convert or parse a datetime value
+    that exceeds the bounds supported by pandas' internal datetime
+    representation.
+
+    See Also
+    --------
+    to_datetime : Convert argument to datetime.
+    Timestamp : Pandas replacement for python ``datetime.datetime`` object.
+
     Examples
     --------
     >>> pd.to_datetime("08335394550")

From 74d36ac1c1fe7e735f5b7392cb9dd1bff57f729b Mon Sep 17 00:00:00 2001
From: Florian Bourgey <bourgeyflorian@gmail.com>
Date: Mon, 30 Sep 2024 12:02:41 -0400
Subject: [PATCH 158/176] Fix docstring Timedelta.to_timedelta64 SA01,
 Timedelta.total_seconds SA01, Timedelta.view SA01 (#59719)

* Add 'See Also' section for Timedelta.to_timedelta64

* Fix SA01 for Timedelta.total_seconds()

* Fix SA01 for Timedelta.view

* Add space

* Fix test_nat_doc_strings

* Revert "Fix test_nat_doc_strings"

This reverts commit 9d0965805daa2dbd02eaa1878858cfb0eb97df02.

* Match doc of total_seconds method in nattype.pyx
---
 pandas/_libs/tslibs/nattype.pyx    | 2 ++
 pandas/_libs/tslibs/timedeltas.pyx | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 620e0846c750e..1c0a99eb1ea25 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -500,6 +500,8 @@ class NaTType(_NaT):
         --------
         to_timedelta : Convert argument to timedelta.
         Timedelta : Represents a duration, the difference between two dates or times.
+        Timedelta.seconds : Returns the seconds component of the timedelta.
+        Timedelta.microseconds : Returns the microseconds component of the timedelta.
 
         Examples
         --------
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 84ca48c96459f..bbefea7c47fc3 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1196,6 +1196,8 @@ cdef class _Timedelta(timedelta):
         --------
         to_timedelta : Convert argument to timedelta.
         Timedelta : Represents a duration, the difference between two dates or times.
+        Timedelta.seconds : Returns the seconds component of the timedelta.
+        Timedelta.microseconds : Returns the microseconds component of the timedelta.
 
         Examples
         --------
@@ -1493,6 +1495,7 @@ cdef class _Timedelta(timedelta):
 
         See Also
         --------
+        Timedelta.asm8 : Return a numpy timedelta64 array scalar view.
         numpy.ndarray.view : Returns a view of an array with the same data.
         Timedelta.to_numpy : Converts the Timedelta to a NumPy timedelta64.
         Timedelta.total_seconds : Returns the total duration of the Timedelta

From 00855f81bd84cc6ed9ae42c5f66916b2208dbe04 Mon Sep 17 00:00:00 2001
From: Qaiser Abbasi <3501767+bbq2100@users.noreply.github.com>
Date: Mon, 30 Sep 2024 18:05:22 +0200
Subject: [PATCH 159/176] Fix typo in 10min.rst (#59921)

---
 doc/source/user_guide/10min.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index 887ffd5580a52..72bb93d21a99f 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -177,7 +177,7 @@ See the indexing documentation :ref:`Indexing and Selecting Data <indexing>` and
 Getitem (``[]``)
 ~~~~~~~~~~~~~~~~
 
-For a :class:`DataFrame`, passing a single label selects a columns and
+For a :class:`DataFrame`, passing a single label selects a column and
 yields a :class:`Series` equivalent to ``df.A``:
 
 .. ipython:: python

From cf480366a6bd9979124b91dd894230cbb510ca4d Mon Sep 17 00:00:00 2001
From: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
Date: Mon, 30 Sep 2024 22:47:40 +0200
Subject: [PATCH 160/176] BLD: Fix armv7 build (#59906)

---
 pandas/_libs/src/vendored/ujson/python/JSONtoObj.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
index 7cc20a52f1849..4cfead8ac77a5 100644
--- a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
+++ b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
@@ -38,9 +38,11 @@ Numeric decoder derived from TCL library
 
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
-#include "pandas/vendored/ujson/lib/ultrajson.h"
+// clang-format off
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include "pandas/vendored/ujson/lib/ultrajson.h"
+// clang-format on
 
 static int Object_objectAddKey(void *Py_UNUSED(prv), JSOBJ obj, JSOBJ name,
                                JSOBJ value) {

From e78ebd3f845c086af1d71c0604701ec49df97228 Mon Sep 17 00:00:00 2001
From: Florian Bourgey <bourgeyflorian@gmail.com>
Date: Mon, 30 Sep 2024 17:50:16 -0400
Subject: [PATCH 161/176] DOC: Fix intro to datastructures Series constructor
 behavior (#59793)

---
 doc/source/user_guide/dsintro.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst
index 9757a72f13fa8..b9c285ca30c96 100644
--- a/doc/source/user_guide/dsintro.rst
+++ b/doc/source/user_guide/dsintro.rst
@@ -87,8 +87,9 @@ index will be pulled out.
 
 **From scalar value**
 
-If ``data`` is a scalar value, an index must be
-provided. The value will be repeated to match the length of **index**.
+If ``data`` is a scalar value, the value will be repeated to match
+the length of **index**.  If the **index** is not provided, it defaults
+to ``RangeIndex(1)``.
 
 .. ipython:: python
 

From f598670353311a6fff4e6e1e96074ccf0737e6b7 Mon Sep 17 00:00:00 2001
From: Petroncini <59212480+Petroncini@users.noreply.github.com>
Date: Tue, 1 Oct 2024 17:33:42 -0300
Subject: [PATCH 162/176] BUG: groupby().any() returns true for groups with
 timedelta all NaT (#59782)

---
 doc/source/whatsnew/v3.0.0.rst        |  1 +
 pandas/core/groupby/ops.py            |  8 +++++---
 pandas/tests/groupby/test_grouping.py | 12 ++++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 41ba80989a0ce..6ebb51cd3ef89 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -652,6 +652,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Bug in :meth:`.DataFrameGroupBy.__len__` and :meth:`.SeriesGroupBy.__len__` would raise when the grouping contained NA values and ``dropna=False`` (:issue:`58644`)
+- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all Timedelta values are NaT. (:issue:`59712`)
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index da80969b613cd..0e99178642715 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -371,6 +371,10 @@ def _call_cython_op(
 
         is_datetimelike = dtype.kind in "mM"
 
+        if self.how in ["any", "all"]:
+            if mask is None:
+                mask = isna(values)
+
         if is_datetimelike:
             values = values.view("int64")
             is_numeric = True
@@ -380,12 +384,10 @@ def _call_cython_op(
             values = values.astype(np.float32)
 
         if self.how in ["any", "all"]:
-            if mask is None:
-                mask = isna(values)
             if dtype == object:
                 if kwargs["skipna"]:
                     # GH#37501: don't raise on pd.NA when skipna=True
-                    if mask.any():
+                    if mask is not None and mask.any():
                         # mask on original values computed separately
                         values = values.copy()
                         values[mask] = True
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index fc2a8a970010a..6bb2eaf89b5d7 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -1180,3 +1180,15 @@ def test_grouping_by_key_is_in_axis():
     result = gb.sum()
     expected = DataFrame({"a": [1, 2], "b": [1, 2], "c": [7, 5]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_any_with_timedelta():
+    # GH#59712
+    df = DataFrame({"value": [pd.Timedelta(1), pd.NaT]})
+
+    result = df.groupby(np.array([0, 1], dtype=np.int64))["value"].any()
+
+    expected = Series({0: True, 1: False}, name="value", dtype=bool)
+    expected.index = expected.index.astype(np.int64)
+
+    tm.assert_series_equal(result, expected)

From f738d9754ff3eb9b92fef9f294e4bd3699191903 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 2 Oct 2024 12:57:25 +0200
Subject: [PATCH 163/176] CI: Run jobs on 2.3.x branch (#59939)

---
 .github/workflows/code-checks.yml         | 4 ++--
 .github/workflows/docbuild-and-upload.yml | 4 ++--
 .github/workflows/package-checks.yml      | 4 ++--
 .github/workflows/unit-tests.yml          | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 7e9c056e75131..e1d2d1ea846b8 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -4,11 +4,11 @@ on:
   push:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
   pull_request:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
 
 env:
   ENV_FILE: environment.yml
diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml
index 47b97fa57852a..908baa87815ab 100644
--- a/.github/workflows/docbuild-and-upload.yml
+++ b/.github/workflows/docbuild-and-upload.yml
@@ -4,13 +4,13 @@ on:
   push:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
     tags:
       - '*'
   pull_request:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
 
 env:
   ENV_FILE: environment.yml
diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml
index 97f90c1588962..6748832903e30 100644
--- a/.github/workflows/package-checks.yml
+++ b/.github/workflows/package-checks.yml
@@ -4,11 +4,11 @@ on:
   push:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
   pull_request:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
     types: [ labeled, opened, synchronize, reopened ]
 
 permissions:
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index d145836f3e596..60b234d613a38 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -4,11 +4,11 @@ on:
   push:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
   pull_request:
     branches:
       - main
-      - 2.2.x
+      - 2.3.x
     paths-ignore:
       - "doc/**"
       - "web/**"

From fd823d22578b684b6070d956def006230e3f6bb3 Mon Sep 17 00:00:00 2001
From: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
Date: Wed, 2 Oct 2024 15:25:48 +0200
Subject: [PATCH 164/176] Include Python.h first (#59929)

---
 pandas/_libs/include/pandas/datetime/date_conversions.h  | 1 +
 pandas/_libs/include/pandas/parser/io.h                  | 3 ++-
 pandas/_libs/include/pandas/parser/pd_parser.h           | 3 ++-
 pandas/_libs/include/pandas/vendored/klib/khash_python.h | 1 +
 pandas/_libs/src/vendored/ujson/python/JSONtoObj.c       | 3 +--
 pandas/_libs/src/vendored/ujson/python/ujson.c           | 1 +
 6 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/include/pandas/datetime/date_conversions.h b/pandas/_libs/include/pandas/datetime/date_conversions.h
index e039991847a62..043805a8b25f4 100644
--- a/pandas/_libs/include/pandas/datetime/date_conversions.h
+++ b/pandas/_libs/include/pandas/datetime/date_conversions.h
@@ -9,6 +9,7 @@ The full license is in the LICENSE file, distributed with this software.
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+
 #include <numpy/ndarraytypes.h>
 
 // Scales value inplace from nanosecond resolution to unit resolution
diff --git a/pandas/_libs/include/pandas/parser/io.h b/pandas/_libs/include/pandas/parser/io.h
index c707c23b567d2..41f1bb9312724 100644
--- a/pandas/_libs/include/pandas/parser/io.h
+++ b/pandas/_libs/include/pandas/parser/io.h
@@ -10,9 +10,10 @@ The full license is in the LICENSE file, distributed with this software.
 #pragma once
 
 #define PY_SSIZE_T_CLEAN
-#include "tokenizer.h"
 #include <Python.h>
 
+#include "tokenizer.h"
+
 #define FS(source) ((file_source *)source)
 
 typedef struct _rd_source {
diff --git a/pandas/_libs/include/pandas/parser/pd_parser.h b/pandas/_libs/include/pandas/parser/pd_parser.h
index 58a09ae1bba39..543839b5d75bf 100644
--- a/pandas/_libs/include/pandas/parser/pd_parser.h
+++ b/pandas/_libs/include/pandas/parser/pd_parser.h
@@ -13,9 +13,10 @@ extern "C" {
 #endif
 
 #define PY_SSIZE_T_CLEAN
-#include "pandas/parser/tokenizer.h"
 #include <Python.h>
 
+#include "pandas/parser/tokenizer.h"
+
 typedef struct {
   int (*to_double)(char *, double *, char, char, int *);
   int (*floatify)(PyObject *, double *, int *);
diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index 2fa61642968cf..9706a8211b61f 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -3,6 +3,7 @@
 #pragma once
 
 #include <Python.h>
+
 #include <pymem.h>
 #include <string.h>
 
diff --git a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
index 4cfead8ac77a5..ef6f1104a1fb9 100644
--- a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
+++ b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
@@ -38,11 +38,10 @@ Numeric decoder derived from TCL library
 
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
-// clang-format off
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+
 #include "pandas/vendored/ujson/lib/ultrajson.h"
-// clang-format on
 
 static int Object_objectAddKey(void *Py_UNUSED(prv), JSOBJ obj, JSOBJ name,
                                JSOBJ value) {
diff --git a/pandas/_libs/src/vendored/ujson/python/ujson.c b/pandas/_libs/src/vendored/ujson/python/ujson.c
index f369d122a3dbe..2ee084b9304f4 100644
--- a/pandas/_libs/src/vendored/ujson/python/ujson.c
+++ b/pandas/_libs/src/vendored/ujson/python/ujson.c
@@ -40,6 +40,7 @@ Numeric decoder derived from TCL library
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+
 #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
 #include "numpy/arrayobject.h"
 

From ba7e83da18ac8bfc4f0a521855c0b2ad05ccbbd4 Mon Sep 17 00:00:00 2001
From: FuzzyParrabellum <58094668+FuzzyParrabellum@users.noreply.github.com>
Date: Wed, 2 Oct 2024 21:38:49 +0200
Subject: [PATCH 165/176] DOC: Fix docstring of pandas.Series.compare list
 indent formatting (#59911)

Co-authored-by: rdzantoine.pro@gmail.com <rdzantoine.pro@gmail.com>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/core/shared_docs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index cb0c3d241534c..81fa508ae6d23 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -65,9 +65,9 @@
     Determine which axis to align the comparison on.
 
     * 0, or 'index' : Resulting differences are stacked vertically
-        with rows drawn alternately from self and other.
+      with rows drawn alternately from self and other.
     * 1, or 'columns' : Resulting differences are aligned horizontally
-        with columns drawn alternately from self and other.
+      with columns drawn alternately from self and other.
 
 keep_shape : bool, default False
     If true, all rows and columns are kept.

From 198ed865420c2a206dc062a32be47c7cc5e76bc0 Mon Sep 17 00:00:00 2001
From: Xiao Yuan <yuanx749@gmail.com>
Date: Fri, 4 Oct 2024 00:08:49 +0800
Subject: [PATCH 166/176] BUG: pd.eval with engine="numexpr" fails with float
 division (#59907)

* BUG: pd.eval with engine="numexpr" fails with float division

* Add skip

* Add whatsnew

* update
---
 doc/source/whatsnew/v3.0.0.rst        | 1 +
 pandas/core/computation/align.py      | 2 +-
 pandas/tests/computation/test_eval.py | 8 ++++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 6ebb51cd3ef89..346e2b9e7997e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -698,6 +698,7 @@ Other
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
+- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py
index 7de4d8cdf99e1..6158c4f4d0539 100644
--- a/pandas/core/computation/align.py
+++ b/pandas/core/computation/align.py
@@ -213,7 +213,7 @@ def reconstruct_object(typ, obj, axes, dtype, name):
     if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
         ret_value = res_t.type(obj)
     else:
-        ret_value = typ(obj).astype(res_t)
+        ret_value = res_t.type(obj)
         # The condition is to distinguish 0-dim array (returned in case of
         # scalar) and 1 element array
         # e.g. np.array(0) and np.array([0])
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 31d568d7c1e0c..3c0bf6c35866c 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1998,3 +1998,11 @@ def test_validate_bool_args(value):
     msg = 'For argument "inplace" expected type bool, received type'
     with pytest.raises(ValueError, match=msg):
         pd.eval("2+2", inplace=value)
+
+
+@td.skip_if_no("numexpr")
+def test_eval_float_div_numexpr():
+    # GH 59736
+    result = pd.eval("1 / 2", engine="numexpr")
+    expected = 0.5
+    assert result == expected

From c47296ad3b9908f77fba5830ec9dbb7f546cb720 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 3 Oct 2024 06:09:36 -1000
Subject: [PATCH 167/176] CLN: indexes/base.py (#59928)

CLN: indexes.base.py
---
 pandas/core/indexes/base.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 852049804a4f5..749a5fea4d513 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4153,7 +4153,8 @@ def reindex(
         preserve_names = not hasattr(target, "name")
 
         # GH7774: preserve dtype/tz if target is empty and not an Index.
-        target = ensure_has_len(target)  # target may be an iterator
+        if is_iterator(target):
+            target = list(target)
 
         if not isinstance(target, Index) and len(target) == 0:
             if level is not None and self._is_multi:
@@ -7568,21 +7569,9 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index:
         return Index(index_like, copy=copy)
 
 
-def ensure_has_len(seq):
-    """
-    If seq is an iterator, put its values into a list.
-    """
-    try:
-        len(seq)
-    except TypeError:
-        return list(seq)
-    else:
-        return seq
-
-
 def trim_front(strings: list[str]) -> list[str]:
     """
-    Trims zeros and decimal points.
+    Trims leading spaces evenly among all strings.
 
     Examples
     --------
@@ -7594,8 +7583,9 @@ def trim_front(strings: list[str]) -> list[str]:
     """
     if not strings:
         return strings
-    while all(strings) and all(x[0] == " " for x in strings):
-        strings = [x[1:] for x in strings]
+    smallest_leading_space = min(len(x) - len(x.lstrip()) for x in strings)
+    if smallest_leading_space > 0:
+        strings = [x[smallest_leading_space:] for x in strings]
     return strings
 
 
From 139def2145b83d40364235c6297e1833eab7bb05 Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <deepak.saldanha007@gmail.com>
Date: Fri, 4 Oct 2024 01:39:46 +0530
Subject: [PATCH 168/176] BUG: fix html float display (#59930)

* fix html display float/strings

* add test under io, update whatsnew

* fix linting

* changes to fix floats only

* Revert "fix linting"

This reverts commit 1061442e0a1cf8f745b0863762f2aa023d388336.

* test script for float format

* remove nbsp implementation, keep floats

* Trigger CI

* implement changes post review

* lint check

* update test_formats.py

* rfc test_format.py

* update test cases
---
 doc/source/whatsnew/v3.0.0.rst         |  1 +
 pandas/core/frame.py                   |  3 ++-
 pandas/tests/io/formats/test_format.py | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 346e2b9e7997e..a5b4560a47bc4 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -620,6 +620,7 @@ I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
+- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
 - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
 - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4c56948a48eb2..f184aab4070d7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1192,6 +1192,7 @@ def _repr_html_(self) -> str | None:
             min_rows = get_option("display.min_rows")
             max_cols = get_option("display.max_columns")
             show_dimensions = get_option("display.show_dimensions")
+            show_floats = get_option("display.float_format")
 
             formatter = fmt.DataFrameFormatter(
                 self,
@@ -1199,7 +1200,7 @@ def _repr_html_(self) -> str | None:
                 col_space=None,
                 na_rep="NaN",
                 formatters=None,
-                float_format=None,
+                float_format=show_floats,
                 sparsify=None,
                 justify=None,
                 index_names=True,
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index af7b04d66096a..82cc3a838ca68 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -368,6 +368,23 @@ def test_repr_min_rows(self):
             assert ".." not in repr(df)
             assert ".." not in df._repr_html_()
 
+    @pytest.mark.parametrize(
+        "data, format_option, expected_values",
+        [
+            (12345.6789, "{:12.3f}", "12345.679"),
+            (None, "{:.3f}", "None"),
+            ("", "{:.2f}", ""),
+            (112345.6789, "{:6.3f}", "112345.679"),
+        ],
+    )
+    def test_repr_float_formatting_html_output(
+        self, data, format_option, expected_values
+    ):
+        with option_context("display.float_format", format_option.format):
+            df = DataFrame({"A": [data]})
+            html_output = df._repr_html_()
+            assert expected_values in html_output
+
     def test_str_max_colwidth(self):
         # GH 7856
         df = DataFrame(

From 4ad6c7a287009f727a8b627b091ba19ba06d9342 Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <deepak.saldanha007@gmail.com>
Date: Sat, 5 Oct 2024 01:15:29 +0530
Subject: [PATCH 169/176] BUG: fix nbsp for html formatting (#59964)

* nbsp for strings

* update changes post review
---
 pandas/io/formats/html.py              |  2 ++
 pandas/tests/io/formats/test_format.py | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
index adaeed017d7bf..fdea1831d5596 100644
--- a/pandas/io/formats/html.py
+++ b/pandas/io/formats/html.py
@@ -195,6 +195,8 @@ def _write_cell(
             esc = {}
 
         rs = pprint_thing(s, escape_chars=esc).strip()
+        # replace spaces betweens strings with non-breaking spaces
+        rs = rs.replace("  ", "&nbsp;&nbsp;")
 
         if self.render_links and is_url(rs):
             rs_unescaped = pprint_thing(s, escape_chars={}).strip()
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 82cc3a838ca68..0dc16e1ebc723 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -375,12 +375,29 @@ def test_repr_min_rows(self):
             (None, "{:.3f}", "None"),
             ("", "{:.2f}", ""),
             (112345.6789, "{:6.3f}", "112345.679"),
+            ("foo      foo", None, "foo&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;foo"),
+            (" foo", None, "foo"),
+            (
+                "foo foo       foo",
+                None,
+                "foo foo&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; foo",
+            ),  # odd no.of spaces
+            (
+                "foo foo    foo",
+                None,
+                "foo foo&nbsp;&nbsp;&nbsp;&nbsp;foo",
+            ),  # even no.of spaces
         ],
     )
     def test_repr_float_formatting_html_output(
         self, data, format_option, expected_values
     ):
-        with option_context("display.float_format", format_option.format):
+        if format_option is not None:
+            with option_context("display.float_format", format_option.format):
+                df = DataFrame({"A": [data]})
+                html_output = df._repr_html_()
+                assert expected_values in html_output
+        else:
             df = DataFrame({"A": [data]})
             html_output = df._repr_html_()
             assert expected_values in html_output

From 58de332785ecac78dbea2d19b5a25253eecf78a0 Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <deepak.saldanha007@gmail.com>
Date: Sat, 5 Oct 2024 02:49:57 +0530
Subject: [PATCH 170/176] BUG: fix treatment of NaNs when .apply() function is
 used on categorical columns. (#59966)

* remove action=ignore for .apply() on cat dtype

* add PR reference in comments

* fix pytest linting

* refac failing test_series_apply.py

* Trigger CI

* changes post review

* rephrase change log
---
 doc/source/whatsnew/v3.0.0.rst          |  2 +-
 pandas/core/apply.py                    | 14 ++------------
 pandas/tests/apply/test_frame_apply.py  |  3 ++-
 pandas/tests/apply/test_series_apply.py |  6 +++---
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a5b4560a47bc4..52debcc49eb27 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -544,7 +544,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
--
+- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
 -
 
 Datetimelike
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 7d50b466f5126..1f13459724d78 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -38,10 +38,7 @@
     is_numeric_dtype,
     is_sequence,
 )
-from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
-    ExtensionDtype,
-)
+from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCNDFrame,
@@ -1465,14 +1462,7 @@ def curried(x):
 
         else:
             curried = func
-
-        # row-wise access
-        # apply doesn't have a `na_action` keyword and for backward compat reasons
-        # we need to give `na_action="ignore"` for categorical data.
-        # TODO: remove the `na_action="ignore"` when that default has been changed in
-        #  Categorical (GH51645).
-        action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
-        mapped = obj._map_values(mapper=curried, na_action=action)
+        mapped = obj._map_values(mapper=curried)
 
         if len(mapped) and isinstance(mapped[0], ABCSeries):
             # GH#43986 Need to do list(mapped) in order to get treated as nested
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index dee0efcd8fd15..f0ab01e9e960e 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -741,8 +741,9 @@ def test_apply_category_equalness(val):
 
     result = df.a.apply(lambda x: x == val)
     expected = Series(
-        [np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
+        [False if pd.isnull(x) else x == val for x in df_values], name="a"
     )
+    # False since behavior of NaN for categorical dtype has been changed (GH 59966)
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 76704de6f2d10..9541b0b7495c7 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -236,10 +236,10 @@ def test_apply_categorical_with_nan_values(series, by_row):
         with pytest.raises(AttributeError, match=msg):
             s.apply(lambda x: x.split("-")[0], by_row=by_row)
         return
-
-    result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
+    # NaN for cat dtype fixed in (GH 59966)
+    result = s.apply(lambda x: x.split("-")[0] if pd.notna(x) else False, by_row=by_row)
     result = result.astype(object)
-    expected = Series(["1", "1", np.nan], dtype="category")
+    expected = Series(["1", "1", False], dtype="category")
     expected = expected.astype(object)
     tm.assert_series_equal(result, expected)
 

From 7f54bec678694b1bb8e91ab4dc8944431d1c7ae1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Janez=20Dem=C5=A1ar?= <janez.demsar@gmail.com>
Date: Sat, 5 Oct 2024 00:09:52 +0200
Subject: [PATCH 171/176] BUG: Fix SparseFrameAccessor.to_dense return type
 (#59967)

* BUG: Fix SparseFrameAccessor.to_dense return type

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst              | 1 +
 pandas/core/arrays/sparse/accessor.py       | 6 +++---
 pandas/tests/arrays/sparse/test_accessor.py | 4 ++++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 52debcc49eb27..35963a90b5d07 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -682,6 +682,7 @@ Sparse
 ^^^^^^
 - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
+- Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index e610e018c5a74..8083371ed171a 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -369,10 +369,10 @@ def to_dense(self) -> DataFrame:
         1  1
         2  0
         """
-        from pandas import DataFrame
-
         data = {k: v.array.to_dense() for k, v in self._parent.items()}
-        return DataFrame(data, index=self._parent.index, columns=self._parent.columns)
+        return self._parent._constructor(
+            data, index=self._parent.index, columns=self._parent.columns
+        )
 
     def to_coo(self) -> spmatrix:
         """
diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index bd3298940ae3a..08bfd5b69fdd9 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -252,3 +252,7 @@ def test_with_column_named_sparse(self):
         # https://github.com/pandas-dev/pandas/issues/30758
         df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
         assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor)
+
+    def test_subclassing(self):
+        df = tm.SubclassedDataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
+        assert isinstance(df.sparse.to_dense(), tm.SubclassedDataFrame)

From aea1643c6428cbf52abfa07b068c445149b98827 Mon Sep 17 00:00:00 2001
From: invalidarg <jo89lin@gmail.com>
Date: Sat, 5 Oct 2024 00:10:36 +0200
Subject: [PATCH 172/176] BUG: CSS strings truncated at ":" (#59720)

* second item in tuple is no longer truncated at first colon

https://github.com/pandas-dev/pandas/issues/59623

* added testcase for maybe_convert_css_to_tuples

#59623

* maybe_convert_css_to_tuples() raises on strings without ":"

* fixed implicit str concatination

* Fixed raise on empty string

* Update test_style.py

* attr:; -> ("attr","")

Same behavior as before patch

* add test for "attr:;", ie empty value

* str concatenation in the test broke mypy

* revert explicit str concat

* Invalidarg patch black (#1)

* black test_style

* Update style_render.py

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/io/formats/style_render.py           | 24 ++++++++++-----------
 pandas/tests/io/formats/style/test_style.py | 13 ++++++++++-
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
index 8a6383f7e8f82..08d9fd938c873 100644
--- a/pandas/io/formats/style_render.py
+++ b/pandas/io/formats/style_render.py
@@ -906,9 +906,9 @@ def concatenated_visible_rows(obj):
                 row_body_headers = [
                     {
                         **col,
-                        "display_value": col["display_value"]
-                        if col["is_visible"]
-                        else "",
+                        "display_value": (
+                            col["display_value"] if col["is_visible"] else ""
+                        ),
                         "cellstyle": self.ctx_index[r, c],
                     }
                     for c, col in enumerate(row[:index_levels])
@@ -2069,18 +2069,18 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
                                              ('border','1px solid red')]
     """
     if isinstance(style, str):
-        s = style.split(";")
-        try:
-            return [
-                (x.split(":")[0].strip(), x.split(":")[1].strip())
-                for x in s
-                if x.strip() != ""
-            ]
-        except IndexError as err:
+        if style and ":" not in style:
             raise ValueError(
                 "Styles supplied as string must follow CSS rule formats, "
                 f"for example 'attr: val;'. '{style}' was given."
-            ) from err
+            )
+        s = style.split(";")
+        return [
+            (x.split(":")[0].strip(), ":".join(x.split(":")[1:]).strip())
+            for x in s
+            if x.strip() != ""
+        ]
+
     return style
 
 
diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
index 89addbbbc1ded..e9fc2b2d27afd 100644
--- a/pandas/tests/io/formats/style/test_style.py
+++ b/pandas/tests/io/formats/style/test_style.py
@@ -886,8 +886,19 @@ def test_maybe_convert_css_to_tuples(self):
         expected = []
         assert maybe_convert_css_to_tuples("") == expected
 
+        # issue #59623
+        expected = [("a", "b"), ("c", "url('data:123')")]
+        assert maybe_convert_css_to_tuples("a:b;c: url('data:123');") == expected
+
+        # if no value, return attr and empty string
+        expected = [("a", ""), ("c", "")]
+        assert maybe_convert_css_to_tuples("a:;c: ") == expected
+
     def test_maybe_convert_css_to_tuples_err(self):
-        msg = "Styles supplied as string must follow CSS rule formats"
+        msg = (
+            "Styles supplied as string must follow CSS rule formats, "
+            "for example 'attr: val;'. 'err' was given."
+        )
         with pytest.raises(ValueError, match=msg):
             maybe_convert_css_to_tuples("err")
 

From 24190fdb0efd781be9f0a886256edc595587c20f Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 5 Oct 2024 22:38:20 +0530
Subject: [PATCH 173/176] DOC: fix RT03,SA01 for pandas.period_range (#59958)

---
 ci/code_checks.sh             | 1 -
 pandas/core/indexes/period.py | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4a1a0042405e3..c9d2f54eba1ed 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -184,7 +184,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
         -i "pandas.io.stata.StataWriter.write_file SA01" \
         -i "pandas.json_normalize RT03,SA01" \
-        -i "pandas.period_range RT03,SA01" \
         -i "pandas.plotting.andrews_curves RT03,SA01" \
         -i "pandas.plotting.lag_plot RT03,SA01" \
         -i "pandas.plotting.scatter_matrix PR07,SA01" \
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index b5f05ef0ab78f..377406e24b1d3 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -563,6 +563,14 @@ def period_range(
     Returns
     -------
     PeriodIndex
+        A PeriodIndex of fixed frequency periods.
+
+    See Also
+    --------
+    date_range : Returns a fixed frequency DatetimeIndex.
+    Period : Represents a period of time.
+    PeriodIndex : Immutable ndarray holding ordinal values indicating regular periods
+        in time.
 
     Notes
     -----

From b63c7954d5195b3999cd867b788758e412bf30e1 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 5 Oct 2024 22:40:04 +0530
Subject: [PATCH 174/176] DOC: fix SA01, ES01 for
 pandas.io.stata.StataReader.data_label (#59962)

---
 ci/code_checks.sh  |  1 -
 pandas/io/stata.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index c9d2f54eba1ed..ad6ea5b0deb9f 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -179,7 +179,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
-        -i "pandas.io.stata.StataReader.data_label SA01" \
         -i "pandas.io.stata.StataReader.value_labels RT03,SA01" \
         -i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
         -i "pandas.io.stata.StataWriter.write_file SA01" \
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 4be06f93689f2..6b988d8fed6bf 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2004,6 +2004,16 @@ def data_label(self) -> str:
         """
         Return data label of Stata file.
 
+        The data label is a descriptive string associated with the dataset
+        stored in the Stata file. This property provides access to that
+        label, if one is present.
+
+        See Also
+        --------
+        io.stata.StataReader.variable_labels : Return a dict associating each variable
+            name with corresponding label.
+        DataFrame.to_stata : Export DataFrame object to Stata dta format.
+
         Examples
         --------
         >>> df = pd.DataFrame([(1,)], columns=["variable"])

From e740857e6399c589e2704da5376a0a28cc251a38 Mon Sep 17 00:00:00 2001
From: Xiao Yuan <yuanx749@gmail.com>
Date: Sun, 6 Oct 2024 01:12:09 +0800
Subject: [PATCH 175/176] BUG: fix to_numeric raises TypeError for Timedelta
 and Timestamp scalar (#59974)

* BUG: fix to_numeric raises TypeError for Timedelta and Timestamp scalar

* Add whatsnew
---
 doc/source/whatsnew/v3.0.0.rst        |  1 +
 pandas/core/tools/numeric.py          |  6 ++++++
 pandas/tests/tools/test_to_numeric.py | 15 +++++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 35963a90b5d07..ed0836233553b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -701,6 +701,7 @@ Other
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
+- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 982851d0557c3..f159babb7e018 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -11,6 +11,10 @@
     lib,
     missing as libmissing,
 )
+from pandas._libs.tslibs import (
+    Timedelta,
+    Timestamp,
+)
 from pandas.util._validators import check_dtype_backend
 
 from pandas.core.dtypes.cast import maybe_downcast_numeric
@@ -189,6 +193,8 @@ def to_numeric(
             return float(arg)
         if is_number(arg):
             return arg
+        if isinstance(arg, (Timedelta, Timestamp)):
+            return arg._value
         is_scalars = True
         values = np.array([arg], dtype="O")
     elif getattr(arg, "ndim", 1) > 1:
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 585b7ca94f730..f3645bf0649bd 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -384,6 +384,21 @@ def test_timedelta(transform_assert_equal):
     assert_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        pd.Timedelta(1, "D"),
+        pd.Timestamp("2017-01-01T12"),
+        pd.Timestamp("2017-01-01T12", tz="US/Pacific"),
+    ],
+)
+def test_timedelta_timestamp_scalar(scalar):
+    # GH#59944
+    result = to_numeric(scalar)
+    expected = to_numeric(Series(scalar))[0]
+    assert result == expected
+
+
 def test_period(request, transform_assert_equal):
     transform, assert_equal = transform_assert_equal
 

From 05fa9583f7bc22796076b7e2a7b94058bebca511 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Sat, 5 Oct 2024 22:43:23 +0530
Subject: [PATCH 176/176] DOC: fix SA01, ES01 for
 pandas.testing.assert_extension_array_equal (#59975)

---
 ci/code_checks.sh            |  1 -
 pandas/_testing/asserters.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index ad6ea5b0deb9f..16a3a22bc4876 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -187,7 +187,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.plotting.lag_plot RT03,SA01" \
         -i "pandas.plotting.scatter_matrix PR07,SA01" \
         -i "pandas.set_eng_float_format RT03,SA01" \
-        -i "pandas.testing.assert_extension_array_equal SA01" \
         -i "pandas.tseries.offsets.BDay PR02,SA01" \
         -i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.n GL08" \
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
index bbd5e60a5a812..01c4dcd92ee40 100644
--- a/pandas/_testing/asserters.py
+++ b/pandas/_testing/asserters.py
@@ -701,6 +701,10 @@ def assert_extension_array_equal(
     """
     Check that left and right ExtensionArrays are equal.
 
+    This method compares two ``ExtensionArray`` instances for equality,
+    including checks for missing values, the dtype of the arrays, and
+    the exactness of the comparison (or tolerance when comparing floats).
+
     Parameters
     ----------
     left, right : ExtensionArray
@@ -726,6 +730,12 @@ def assert_extension_array_equal(
 
         .. versionadded:: 2.0.0
 
+    See Also
+    --------
+    testing.assert_series_equal : Check that left and right ``Series`` are equal.
+    testing.assert_frame_equal : Check that left and right ``DataFrame`` are equal.
+    testing.assert_index_equal : Check that left and right ``Index`` are equal.
+
     Notes
     -----
     Missing values are checked separately from valid values.