From 631ab35ecd469c543ce740ad493dee44e167eabc Mon Sep 17 00:00:00 2001 From: Harsha Lakamsani Date: Sun, 25 Aug 2024 10:52:12 -0700 Subject: [PATCH 001/176] DOCS: fix docstring validation errors for pandas.Series (#59602) * DOCS: pandas.Series.prod + pandas.Series.product RT03 docstring validation error fixed * DOCS: pandas.Series.pop SA01 + pandas.Series.reorder_levels RT03/SA01 docstring validation error fixed * DOCS: pandas.Series.list.__getitem__ + pandas.Series.list.flatten + pandas.Series.list.len SA01 docstring validation error fixed * DOCS: pandas.Series.sparse.density SA01 docstring validation error fixed * DOCS: pandas.Series.gt + pandas.Series.lt + pandas.Series.ne SA01 docstring validation error fixed * linting issues leftover from docstring validation fixes resolved --- ci/code_checks.sh | 11 ----------- pandas/core/arrays/arrow/accessors.py | 14 ++++++++++++++ pandas/core/arrays/sparse/array.py | 5 +++++ pandas/core/generic.py | 2 ++ pandas/core/ops/docstrings.py | 6 +++--- pandas/core/series.py | 13 ++++++++++++- 6 files changed, 36 insertions(+), 15 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index e9f4ee1f391a2..0cb2df7bb334b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -133,20 +133,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.dt.tz_convert PR01,PR02" \ -i "pandas.Series.dt.tz_localize PR01,PR02" \ -i "pandas.Series.dt.unit GL08" \ - -i "pandas.Series.gt SA01" \ - -i "pandas.Series.list.__getitem__ SA01" \ - -i "pandas.Series.list.flatten SA01" \ - -i "pandas.Series.list.len SA01" \ - -i "pandas.Series.lt SA01" \ - -i "pandas.Series.ne SA01" \ -i "pandas.Series.pad PR01,SA01" \ - -i "pandas.Series.pop SA01" \ - -i "pandas.Series.prod RT03" \ - -i "pandas.Series.product RT03" \ - -i "pandas.Series.reorder_levels RT03,SA01" \ -i "pandas.Series.sem PR01,RT03,SA01" \ -i "pandas.Series.sparse PR01,SA01" \ - -i "pandas.Series.sparse.density SA01" \ -i "pandas.Series.sparse.fill_value SA01" \ -i "pandas.Series.sparse.from_coo PR07,SA01" \ -i "pandas.Series.sparse.npoints SA01" \ diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index d8f948a37d206..aea162461d3c1 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -92,6 +92,12 @@ def len(self) -> Series: pandas.Series The length of each list. + See Also + -------- + str.len : Python built-in function returning the length of an object. + Series.size : Returns the length of the Series. + StringMethods.len : Compute the length of each element in the Series/Index. + Examples -------- >>> import pyarrow as pa @@ -128,6 +134,10 @@ def __getitem__(self, key: int | slice) -> Series: pandas.Series The list at requested index. + See Also + -------- + ListAccessor.flatten : Flatten list values. + Examples -------- >>> import pyarrow as pa @@ -187,6 +197,10 @@ def flatten(self) -> Series: pandas.Series The data from all lists in the series flattened. + See Also + -------- + ListAccessor.__getitem__ : Index or slice values in the Series. + Examples -------- >>> import pyarrow as pa diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 3a08344369822..a09dc20af3b36 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -671,6 +671,11 @@ def density(self) -> float: """ The percent of non- ``fill_value`` points, as decimal. + See Also + -------- + DataFrame.sparse.from_spmatrix : Create a new DataFrame from a + scipy sparse matrix. + Examples -------- >>> from pandas.arrays import SparseArray diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdc8642c9c70e..61fa5c49a8c5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11815,6 +11815,8 @@ def last_valid_index(self) -> Hashable: Returns ------- {name1} or scalar\ + + Value containing the calculation referenced in the description.\ {see_also}\ {examples} """ diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 0ad6db0aefe9c..5ce0a2da86f31 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -376,7 +376,7 @@ def make_flex_doc(op_name: str, typ: str) -> str: "ne": { "op": "!=", "desc": "Not equal to", - "reverse": None, + "reverse": "eq", "series_examples": _ne_example_SERIES, "series_returns": _returns_series, }, @@ -397,14 +397,14 @@ def make_flex_doc(op_name: str, typ: str) -> str: "gt": { "op": ">", "desc": "Greater than", - "reverse": None, + "reverse": "lt", "series_examples": _gt_example_SERIES, "series_returns": _returns_series, }, "ge": { "op": ">=", "desc": "Greater than or equal to", - "reverse": None, + "reverse": "le", "series_examples": _ge_example_SERIES, "series_returns": _returns_series, }, diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c35c6c0d6d23..ed27984526fa5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4093,7 +4093,13 @@ def reorder_levels(self, order: Sequence[Level]) -> Series: Returns ------- - type of caller (new object) + Series + Type of caller with index as MultiIndex (new object). + + See Also + -------- + DataFrame.reorder_levels : Rearrange index or column levels using + input ``order``. Examples -------- @@ -5048,6 +5054,11 @@ def pop(self, item: Hashable) -> Any: scalar Value that is popped from series. + See Also + -------- + Series.drop: Drop specified values from Series. + Series.drop_duplicates: Return Series with duplicate values removed. + Examples -------- >>> ser = pd.Series([1, 2, 3]) From dca2635d0ab246b45c5edf6575e2d5fc20751023 Mon Sep 17 00:00:00 2001 From: Florian Bourgey Date: Sun, 25 Aug 2024 13:53:53 -0400 Subject: [PATCH 002/176] Add example same correlation in pandas.Series.corr documentation (#59591) --- pandas/core/series.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index ed27984526fa5..d944d1ce819b6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2619,6 +2619,13 @@ def corr( >>> s2 = pd.Series([1, 2, 3], index=[2, 1, 0]) >>> s1.corr(s2) -1.0 + + If the input is a constant array, the correlation is not defined in this case, + and ``np.nan`` is returned. + + >>> s1 = pd.Series([0.45, 0.45]) + >>> s1.corr(s1) + nan """ # noqa: E501 this, other = self.align(other, join="inner") if len(this) == 0: From 2130a99d1f3ffaf871bea5c40f1aa5ef59659687 Mon Sep 17 00:00:00 2001 From: Ankit Dhokariya <67553771+ankit-dhokariya@users.noreply.github.com> Date: Sun, 25 Aug 2024 10:56:10 -0700 Subject: [PATCH 003/176] DOC: Enforce Numpy Docstring Validation (Issue #59458) (#59590) * adding docstring for pandas.Timestamp.day property * fixing type annotation --- ci/code_checks.sh | 1 - pandas/_libs/tslibs/timestamps.pyx | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 0cb2df7bb334b..bffac19b1b128 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -168,7 +168,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.TimedeltaIndex.nanoseconds SA01" \ -i "pandas.TimedeltaIndex.seconds SA01" \ -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \ - -i "pandas.Timestamp.day GL08" \ -i "pandas.Timestamp.fold GL08" \ -i "pandas.Timestamp.hour GL08" \ -i "pandas.Timestamp.max PR02" \ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3268207b667f2..a9463ce8ad044 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -961,6 +961,29 @@ cdef class _Timestamp(ABCTimestamp): """ return ((self.month - 1) // 3) + 1 + @property + def day(self) -> int: + """ + Return the day of the Timestamp. + + Returns + ------- + int + The day of the Timestamp. + + See Also + -------- + Timestamp.week : Return the week number of the year. + Timestamp.weekday : Return the day of the week. + + Examples + -------- + >>> ts = pd.Timestamp("2024-08-31 16:16:30") + >>> ts.day + 31 + """ + return super().day + @property def week(self) -> int: """ From fe42b3b234f6b513da68f98b648d60d9f9e66e30 Mon Sep 17 00:00:00 2001 From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com> Date: Sun, 25 Aug 2024 18:57:31 +0100 Subject: [PATCH 004/176] DOCS: fix docstring validation errors for pandas.Series (#59596) Fixes: -i "pandas.Series.str.match RT03" \ -i "pandas.Series.str.normalize RT03,SA01" \ -i "pandas.Series.str.repeat SA01" \ -i "pandas.Series.str.replace SA01" \ --- ci/code_checks.sh | 4 ---- pandas/core/strings/accessor.py | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index bffac19b1b128..25d68cdf41095 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -142,10 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.sparse.sp_values SA01" \ -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \ -i "pandas.Series.std PR01,RT03,SA01" \ - -i "pandas.Series.str.match RT03" \ - -i "pandas.Series.str.normalize RT03,SA01" \ - -i "pandas.Series.str.repeat SA01" \ - -i "pandas.Series.str.replace SA01" \ -i "pandas.Series.str.wrap RT03,SA01" \ -i "pandas.Series.str.zfill RT03" \ -i "pandas.Series.struct.dtypes SA01" \ diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1014c9559afaf..c88270b2a2f16 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1379,6 +1379,9 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=None): Returns ------- Series/Index/array of boolean values + A Series, Index, or array of boolean values indicating whether the start + of each string matches the pattern. The result will be of the same type + as the input. See Also -------- @@ -1503,6 +1506,14 @@ def replace( * if `pat` is a compiled regex and `case` or `flags` is set * if `pat` is a dictionary and `repl` is not None. + See Also + -------- + Series.str.replace : Method to replace occurrences of a substring with another + substring. + Series.str.extract : Extract substrings using a regular expression. + Series.str.findall : Find all occurrences of a pattern or regex in each string. + Series.str.split : Split each string by a specified delimiter or pattern. + Notes ----- When `pat` is a compiled regex, all flags should be included in the @@ -1634,6 +1645,20 @@ def repeat(self, repeats): Series or Index of repeated string objects specified by input parameter repeats. + See Also + -------- + Series.str.lower : Convert all characters in each string to lowercase. + Series.str.upper : Convert all characters in each string to uppercase. + Series.str.title : Convert each string to title case (capitalizing the first + letter of each word). + Series.str.strip : Remove leading and trailing whitespace from each string. + Series.str.replace : Replace occurrences of a substring with another substring + in each string. + Series.str.ljust : Left-justify each string in the Series/Index by padding with + a specified character. + Series.str.rjust : Right-justify each string in the Series/Index by padding with + a specified character. + Examples -------- >>> s = pd.Series(["a", "b", "c"]) @@ -3091,6 +3116,19 @@ def normalize(self, form): Returns ------- Series/Index of objects + A Series or Index of strings in the same Unicode form specified by `form`. + The returned object retains the same type as the input (Series or Index), + and contains the normalized strings. + + See Also + -------- + Series.str.upper : Convert all characters in each string to uppercase. + Series.str.lower : Convert all characters in each string to lowercase. + Series.str.title : Convert each string to title case (capitalizing the + first letter of each word). + Series.str.strip : Remove leading and trailing whitespace from each string. + Series.str.replace : Replace occurrences of a substring with another substring + in each string. Examples -------- From 90e8e04c2d305624d2d0b68087e65b6aace7ef1f Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sun, 25 Aug 2024 10:59:50 -0700 Subject: [PATCH 005/176] MAINT: update vendored version util from packaging (#59558) * MAINT: update vendored version util from packaging * fix docstring * fix docstring * skip docstring validation * ignore * fix validation ignore * remove docstring * rollback * add comments --- pandas/util/version/__init__.py | 238 +++++++------------------------- 1 file changed, 51 insertions(+), 187 deletions(-) diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 9838e371f0d00..b5d975a0db1d8 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -1,27 +1,22 @@ -# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py -# and https://github.com/pypa/packaging/blob/main/packaging/_structures.py -# changeset ae891fd74d6dd4c6063bb04f2faeadaac6fc6313 -# 04/30/2021 +# Vendored from https://github.com/pypa/packaging/blob/main/src/packaging/_structures.py +# and https://github.com/pypa/packaging/blob/main/src/packaging/version.py +# changeset 24e5350b2ff3c5c7a36676c2af5f2cb39fd1baf8 # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. Licence at LICENSES/PACKAGING_LICENSE from __future__ import annotations -import collections -from collections.abc import ( - Callable, - Iterator, -) +from collections.abc import Callable import itertools import re from typing import ( + Any, + NamedTuple, SupportsInt, - Tuple, Union, ) -import warnings -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "InvalidVersion", "Version", "parse"] class InfinityType: @@ -40,9 +35,6 @@ def __le__(self, other: object) -> bool: def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) - def __ne__(self, other: object) -> bool: - return not isinstance(other, type(self)) - def __gt__(self, other: object) -> bool: return True @@ -72,9 +64,6 @@ def __le__(self, other: object) -> bool: def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) - def __ne__(self, other: object) -> bool: - return not isinstance(other, type(self)) - def __gt__(self, other: object) -> bool: return False @@ -88,45 +77,39 @@ def __neg__(self: object) -> InfinityType: NegativeInfinity = NegativeInfinityType() -InfiniteTypes = Union[InfinityType, NegativeInfinityType] -PrePostDevType = Union[InfiniteTypes, tuple[str, int]] -SubLocalType = Union[InfiniteTypes, int, str] -LocalType = Union[ +LocalType = tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, tuple[str, int]] +CmpLocalType = Union[ NegativeInfinityType, - tuple[ - Union[ - SubLocalType, - tuple[SubLocalType, str], - tuple[NegativeInfinityType, SubLocalType], - ], - ..., - ], + tuple[Union[tuple[int, str], tuple[NegativeInfinityType, Union[int, str]]], ...], ] CmpKey = tuple[ - int, tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType -] -LegacyCmpKey = tuple[int, tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool + int, + tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, ] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] -_Version = collections.namedtuple( - "_Version", ["epoch", "release", "dev", "pre", "post", "local"] -) +class _Version(NamedTuple): + epoch: int + release: tuple[int, ...] + dev: tuple[str, int] | None + pre: tuple[str, int] | None + post: tuple[str, int] | None + local: LocalType | None -def parse(version: str) -> LegacyVersion | Version: - """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. - """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + +def parse(version: str) -> Version: + return Version(version) +# The docstring is from an older version of the packaging library to avoid +# errors in the docstring validation. class InvalidVersion(ValueError): """ An invalid version was found, users should refer to PEP 440. @@ -140,7 +123,7 @@ class InvalidVersion(ValueError): class _BaseVersion: - _key: CmpKey | LegacyCmpKey + _key: tuple[Any, ...] def __hash__(self) -> int: return hash(self._key) @@ -185,132 +168,16 @@ def __ne__(self, other: object) -> bool: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release.", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - mapped_part = _legacy_version_replacement_map.get(part, part) - - if not mapped_part or mapped_part == ".": - continue - - if mapped_part[:1] in "0123456789": - # pad for numeric comparison - yield mapped_part.zfill(8) - else: - yield "*" + mapped_part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: list[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch (?P[0-9]+(?:\.[0-9]+)*) # release segment (?P
                                          # pre-release
             [-_\.]?
-            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            (?Palpha|a|beta|b|preview|pre|c|rc)
             [-_\.]?
             (?P[0-9]+)?
         )?
@@ -334,9 +201,12 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:
     (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
 """
 
+VERSION_PATTERN = _VERSION_PATTERN
+
 
 class Version(_BaseVersion):
     _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
 
     def __init__(self, version: str) -> None:
         # Validate the version and parse it into pieces
@@ -377,11 +247,11 @@ def __str__(self) -> str:
             parts.append(f"{self.epoch}!")
 
         # Release segment
-        parts.append(".".join([str(x) for x in self.release]))
+        parts.append(".".join(str(x) for x in self.release))
 
         # Pre-release
         if self.pre is not None:
-            parts.append("".join([str(x) for x in self.pre]))
+            parts.append("".join(str(x) for x in self.pre))
 
         # Post-release
         if self.post is not None:
@@ -399,18 +269,15 @@ def __str__(self) -> str:
 
     @property
     def epoch(self) -> int:
-        _epoch: int = self._version.epoch
-        return _epoch
+        return self._version.epoch
 
     @property
     def release(self) -> tuple[int, ...]:
-        _release: tuple[int, ...] = self._version.release
-        return _release
+        return self._version.release
 
     @property
     def pre(self) -> tuple[str, int] | None:
-        _pre: tuple[str, int] | None = self._version.pre
-        return _pre
+        return self._version.pre
 
     @property
     def post(self) -> int | None:
@@ -423,7 +290,7 @@ def dev(self) -> int | None:
     @property
     def local(self) -> str | None:
         if self._version.local:
-            return ".".join([str(x) for x in self._version.local])
+            return ".".join(str(x) for x in self._version.local)
         else:
             return None
 
@@ -440,7 +307,7 @@ def base_version(self) -> str:
             parts.append(f"{self.epoch}!")
 
         # Release segment
-        parts.append(".".join([str(x) for x in self.release]))
+        parts.append(".".join(str(x) for x in self.release))
 
         return "".join(parts)
 
@@ -470,7 +337,7 @@ def micro(self) -> int:
 
 
 def _parse_letter_version(
-    letter: str, number: str | bytes | SupportsInt
+    letter: str | None, number: str | bytes | SupportsInt | None
 ) -> tuple[str, int] | None:
     if letter:
         # We consider there to be an implicit 0 in a pre-release if there is
@@ -507,10 +374,7 @@ def _parse_letter_version(
 _local_version_separators = re.compile(r"[\._-]")
 
 
-def _parse_local_version(local: str) -> LocalType | None:
-    """
-    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
-    """
+def _parse_local_version(local: str | None) -> LocalType | None:
     if local is not None:
         return tuple(
             part.lower() if not part.isdigit() else int(part)
@@ -525,7 +389,7 @@ def _cmpkey(
     pre: tuple[str, int] | None,
     post: tuple[str, int] | None,
     dev: tuple[str, int] | None,
-    local: tuple[SubLocalType] | None,
+    local: LocalType | None,
 ) -> CmpKey:
     # When we compare a release version, we want to compare it with all of the
     # trailing zeros removed. So we'll use a reverse the list, drop all the now
@@ -541,7 +405,7 @@ def _cmpkey(
     # if there is not a pre or a post segment. If we have one of those then
     # the normal sorting rules will handle this case correctly.
     if pre is None and post is None and dev is not None:
-        _pre: PrePostDevType = NegativeInfinity
+        _pre: CmpPrePostDevType = NegativeInfinity
     # Versions without a pre-release (except as noted above) should sort after
     # those with one.
     elif pre is None:
@@ -551,21 +415,21 @@ def _cmpkey(
 
     # Versions without a post segment should sort before those with one.
     if post is None:
-        _post: PrePostDevType = NegativeInfinity
+        _post: CmpPrePostDevType = NegativeInfinity
 
     else:
         _post = post
 
     # Versions without a development segment should sort after those with one.
     if dev is None:
-        _dev: PrePostDevType = Infinity
+        _dev: CmpPrePostDevType = Infinity
 
     else:
         _dev = dev
 
     if local is None:
         # Versions without a local segment should sort before those with one.
-        _local: LocalType = NegativeInfinity
+        _local: CmpLocalType = NegativeInfinity
     else:
         # Versions with a local segment need that segment parsed to implement
         # the sorting rules in PEP440.

From 50c30324cc5ad555e6f40174f066626c630660c6 Mon Sep 17 00:00:00 2001
From: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
Date: Sun, 25 Aug 2024 19:05:19 +0100
Subject: [PATCH 006/176] DOC: Enforce Numpy Docstring Validation |
 pandas.api.extensions.ExtensionArray (#59407)

* Fix pandas.api.extensions.ExtensionArray._pad_or_backfill

Add 'limit_area' parameter, return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray._reduce

Add return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray._values_for_factorize

Add 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.astype

Add 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.dropna

Add return value description and 'See Also' section

* Fix pandas.api.extensions.ExtensionArray.dtype

Add 'See Also' section
---
 ci/code_checks.sh          |  6 ---
 pandas/core/arrays/base.py | 77 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 25d68cdf41095..1594055f4572a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -177,12 +177,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.extensions.ExtensionArray._pad_or_backfill PR01,RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray._reduce RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray._values_for_factorize SA01" \
-        -i "pandas.api.extensions.ExtensionArray.astype SA01" \
-        -i "pandas.api.extensions.ExtensionArray.dropna RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.dtype SA01" \
         -i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
         -i "pandas.api.extensions.ExtensionArray.fillna SA01" \
         -i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index a0c318409d6bb..f05d1ae18c604 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -608,6 +608,14 @@ def dtype(self) -> ExtensionDtype:
         """
         An instance of ExtensionDtype.
 
+        See Also
+        --------
+        api.extensions.ExtensionDtype : Base class for extension dtypes.
+        api.extensions.ExtensionArray : Base class for extension array types.
+        api.extensions.ExtensionArray.dtype : The dtype of an ExtensionArray.
+        Series.dtype : The dtype of a Series.
+        DataFrame.dtype : The dtype of a DataFrame.
+
         Examples
         --------
         >>> pd.array([1, 2, 3]).dtype
@@ -713,6 +721,16 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``,
             otherwise a Numpy ndarray with ``dtype`` for its dtype.
 
+        See Also
+        --------
+        Series.astype : Cast a Series to a different dtype.
+        DataFrame.astype : Cast a DataFrame to a different dtype.
+        api.extensions.ExtensionArray : Base class for ExtensionArray objects.
+        core.arrays.DatetimeArray._from_sequence : Create a DatetimeArray from a
+            sequence.
+        core.arrays.TimedeltaArray._from_sequence : Create a TimedeltaArray from
+            a sequence.
+
         Examples
         --------
         >>> arr = pd.array([1, 2, 3])
@@ -1032,6 +1050,12 @@ def _pad_or_backfill(
             maximum number of entries along the entire axis where NaNs will be
             filled.
 
+        limit_area : {'inside', 'outside'} or None, default None
+            Specifies which area to limit filling.
+            - 'inside': Limit the filling to the area within the gaps.
+            - 'outside': Limit the filling to the area outside the gaps.
+            If `None`, no limitation is applied.
+
         copy : bool, default True
             Whether to make a copy of the data before filling. If False, then
             the original should be modified and no new memory should be allocated.
@@ -1043,6 +1067,16 @@ def _pad_or_backfill(
         Returns
         -------
         Same type as self
+            The filled array with the same type as the original.
+
+        See Also
+        --------
+        Series.ffill : Forward fill missing values.
+        Series.bfill : Backward fill missing values.
+        DataFrame.ffill : Forward fill missing values in DataFrame.
+        DataFrame.bfill : Backward fill missing values in DataFrame.
+        api.types.isna : Check for missing values.
+        api.types.isnull : Check for missing values.
 
         Examples
         --------
@@ -1149,6 +1183,16 @@ def dropna(self) -> Self:
 
         Returns
         -------
+        Self
+            An ExtensionArray of the same type as the original but with all
+            NA values removed.
+
+        See Also
+        --------
+        Series.dropna : Remove missing values from a Series.
+        DataFrame.dropna : Remove missing values from a DataFrame.
+        api.extensions.ExtensionArray.isna : Check for missing values in
+            an ExtensionArray.
 
         Examples
         --------
@@ -1423,6 +1467,10 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
             `-1` and not included in `uniques`. By default,
             ``np.nan`` is used.
 
+        See Also
+        --------
+        util.hash_pandas_object : Hash the pandas object.
+
         Notes
         -----
         The values returned by this method are also used in
@@ -1988,16 +2036,43 @@ def _reduce(
 
         Returns
         -------
-        scalar
+        scalar or ndarray:
+            The result of the reduction operation. The type of the result
+            depends on `keepdims`:
+            - If `keepdims` is `False`, a scalar value is returned.
+            - If `keepdims` is `True`, the result is wrapped in a numpy array with
+            a single element.
 
         Raises
         ------
         TypeError : subclass does not define operations
 
+        See Also
+        --------
+        Series.min : Return the minimum value.
+        Series.max : Return the maximum value.
+        Series.sum : Return the sum of values.
+        Series.mean : Return the mean of values.
+        Series.median : Return the median of values.
+        Series.std : Return the standard deviation.
+        Series.var : Return the variance.
+        Series.prod : Return the product of values.
+        Series.sem : Return the standard error of the mean.
+        Series.kurt : Return the kurtosis.
+        Series.skew : Return the skewness.
+
         Examples
         --------
         >>> pd.array([1, 2, 3])._reduce("min")
         1
+        >>> pd.array([1, 2, 3])._reduce("max")
+        3
+        >>> pd.array([1, 2, 3])._reduce("sum")
+        6
+        >>> pd.array([1, 2, 3])._reduce("mean")
+        2.0
+        >>> pd.array([1, 2, 3])._reduce("median")
+        2.0
         """
         meth = getattr(self, name, None)
         if meth is None:

From 360597c349f4309364af0d5ac3bab158fd83d9fa Mon Sep 17 00:00:00 2001
From: Alex 
Date: Sun, 25 Aug 2024 18:24:30 -0400
Subject: [PATCH 007/176] DOCS: fix docstring validation errors for
 pandas.Series (#59600)

* DOCS: fix docstring validation errors for pandas.Series

* DOCS: fix underline length
---
 ci/code_checks.sh                     | 2 --
 pandas/core/arrays/arrow/accessors.py | 4 ++++
 pandas/core/series.py                 | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 1594055f4572a..2d260c78a8f33 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -144,8 +144,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.std PR01,RT03,SA01" \
         -i "pandas.Series.str.wrap RT03,SA01" \
         -i "pandas.Series.str.zfill RT03" \
-        -i "pandas.Series.struct.dtypes SA01" \
-        -i "pandas.Series.to_markdown SA01" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index aea162461d3c1..d9a80b699b0bb 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -258,6 +258,10 @@ def dtypes(self) -> Series:
         pandas.Series
             The data type of each child field.
 
+        See Also
+        --------
+        Series.dtype: Return the dtype object of the underlying data.
+
         Examples
         --------
         >>> import pyarrow as pa
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d944d1ce819b6..17494f948876a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1617,6 +1617,11 @@ def to_markdown(
         str
             {klass} in Markdown-friendly format.
 
+        See Also
+        --------
+        Series.to_frame : Rrite a text representation of object to the system clipboard.
+        Series.to_latex : Render Series to LaTeX-formatted table.
+
         Notes
         -----
         Requires the `tabulate `_ package.

From 55441d313c0d5c8e23558734bc20681c1a31378a Mon Sep 17 00:00:00 2001
From: wenchen-cai 
Date: Tue, 27 Aug 2024 00:23:26 +0800
Subject: [PATCH 008/176] DOCS: fix docstring validation errors for
 pandas.Series.str (#59597)

---
 ci/code_checks.sh               | 2 --
 pandas/core/strings/accessor.py | 8 ++++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2d260c78a8f33..916720e5a01e3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -142,8 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.sparse.sp_values SA01" \
         -i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
         -i "pandas.Series.std PR01,RT03,SA01" \
-        -i "pandas.Series.str.wrap RT03,SA01" \
-        -i "pandas.Series.str.zfill RT03" \
         -i "pandas.Timedelta.asm8 SA01" \
         -i "pandas.Timedelta.ceil SA01" \
         -i "pandas.Timedelta.components SA01" \
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index c88270b2a2f16..bdb88e981bcda 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1853,6 +1853,7 @@ def zfill(self, width: int):
         Returns
         -------
         Series/Index of objects.
+            A Series or Index where the strings are prepended with '0' characters.
 
         See Also
         --------
@@ -2385,6 +2386,13 @@ def wrap(
         Returns
         -------
         Series or Index
+            A Series or Index where the strings are wrapped at the specified line width.
+
+        See Also
+        --------
+        Series.str.strip : Remove leading and trailing characters in Series/Index.
+        Series.str.lstrip : Remove leading characters in Series/Index.
+        Series.str.rstrip : Remove trailing characters in Series/Index.
 
         Notes
         -----

From 6fa4eb43fbf01d558c9e8cd0fdde6fa5359c9d19 Mon Sep 17 00:00:00 2001
From: Abhinav Reddy 
Date: Mon, 26 Aug 2024 12:25:02 -0400
Subject: [PATCH 009/176] DOC: Fix Numpy Docstring errors in
 pandas.api.extensions.ExtensionArray (#59605)

* fix duplicated

* fix fillna

* fix insert

* fix isin

* fix tolist

* fix unique

* fix view

---------

Co-authored-by: Abhinav Thimma 
---
 ci/code_checks.sh          |  7 -----
 pandas/core/arrays/base.py | 52 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 916720e5a01e3..4ddc429f2a51c 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -173,14 +173,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.value GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.fillna SA01" \
-        -i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
         -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
-        -i "pandas.api.extensions.ExtensionArray.view SA01" \
         -i "pandas.api.interchange.from_dataframe RT03,SA01" \
         -i "pandas.api.types.is_bool PR01,SA01" \
         -i "pandas.api.types.is_categorical_dtype SA01" \
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f05d1ae18c604..2124f86b03b9c 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1137,6 +1137,13 @@ def fillna(
         ExtensionArray
             With NA/NaN filled.
 
+        See Also
+        --------
+        api.extensions.ExtensionArray.dropna : Return ExtensionArray without
+            NA values.
+        api.extensions.ExtensionArray.isna : A 1-D array indicating if
+            each value is missing.
+
         Examples
         --------
         >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
@@ -1220,6 +1227,15 @@ def duplicated(
         Returns
         -------
         ndarray[bool]
+            With true in indices where elements are duplicated and false otherwise.
+
+        See Also
+        --------
+        DataFrame.duplicated : Return boolean Series denoting
+            duplicate rows.
+        Series.duplicated : Indicate duplicate Series values.
+        api.extensions.ExtensionArray.unique : Compute the ExtensionArray
+            of unique values.
 
         Examples
         --------
@@ -1303,6 +1319,13 @@ def unique(self) -> Self:
         Returns
         -------
         pandas.api.extensions.ExtensionArray
+            With unique values from the input array.
+
+        See Also
+        --------
+        Index.unique: Return unique values in the index.
+        Series.unique: Return unique values of Series object.
+        unique: Return unique values based on a hash table.
 
         Examples
         --------
@@ -1436,10 +1459,18 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         Parameters
         ----------
         values : np.ndarray or ExtensionArray
+            Values to compare every element in the array against.
 
         Returns
         -------
         np.ndarray[bool]
+            With true at indices where value is in `values`.
+
+        See Also
+        --------
+        DataFrame.isin: Whether each element in the DataFrame is contained in values.
+        Index.isin: Return a boolean array where the index values are in values.
+        Series.isin: Whether elements in Series are contained in values.
 
         Examples
         --------
@@ -1743,6 +1774,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         ExtensionArray or np.ndarray
             A view on the :class:`ExtensionArray`'s data.
 
+        See Also
+        --------
+        api.extensions.ExtensionArray.ravel: Return a flattened view on input array.
+        Index.view: Equivalent function for Index.
+        ndarray.view: New view of array with the same data.
+
         Examples
         --------
         This gives view on the underlying data of an ``ExtensionArray`` and is not a
@@ -2201,6 +2238,12 @@ def tolist(self) -> list:
         Returns
         -------
         list
+            Python list of values in array.
+
+        See Also
+        --------
+        Index.to_list: Return a list of the values in the Index.
+        Series.to_list: Return a list of the values in the Series.
 
         Examples
         --------
@@ -2223,11 +2266,18 @@ def insert(self, loc: int, item) -> Self:
         Parameters
         ----------
         loc : int
+            Index where the `item` needs to be inserted.
         item : scalar-like
+            Value to be inserted.
 
         Returns
         -------
-        same type as self
+        ExtensionArray
+            With `item` inserted at `loc`.
+
+        See Also
+        --------
+        Index.insert: Make new Index inserting new item at location.
 
         Notes
         -----

From d31aa834cef5a433938933f75ca20f0268a4ea83 Mon Sep 17 00:00:00 2001
From: ktseng4096 <32848825+ktseng4096@users.noreply.github.com>
Date: Mon, 26 Aug 2024 11:33:43 -0700
Subject: [PATCH 010/176] DOC: add See Also section to
 groupby.DataFrameGroupBy.prod (#59599)

* Update Groupby.prod

* update code_check list

* remove extra spaces

* fix errors

* ruff formatting
---
 ci/code_checks.sh              |  2 -
 pandas/core/groupby/groupby.py | 77 ++++++++++++++++------------------
 2 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4ddc429f2a51c..76cc02652ec24 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -226,7 +226,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-        -i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
@@ -243,7 +242,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-        -i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
         -i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b288dad63179f..8c9c92594ebe7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -164,32 +164,6 @@ class providing the base-class of operations.
             to each row or column of a DataFrame.
 """
 
-_groupby_agg_method_template = """
-Compute {fname} of group values.
-
-Parameters
-----------
-numeric_only : bool, default {no}
-    Include only float, int, boolean columns.
-
-    .. versionchanged:: 2.0.0
-
-        numeric_only no longer accepts ``None``.
-
-min_count : int, default {mc}
-    The required number of valid values to perform the operation. If fewer
-    than ``min_count`` non-NA values are present the result will be NA.
-
-Returns
--------
-Series or DataFrame
-    Computed {fname} of values within each group.
-
-Examples
---------
-{example}
-"""
-
 _groupby_agg_method_engine_template = """
 Compute {fname} of group values.
 
@@ -3029,16 +3003,38 @@ def sum(
             return result
 
     @final
-    @doc(
-        _groupby_agg_method_template,
-        fname="prod",
-        no=False,
-        mc=0,
-        example=dedent(
-            """\
+    def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
+        """
+        Compute prod of group values.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+            .. versionchanged:: 2.0.0
+
+                numeric_only no longer accepts ``None``.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+            Computed prod of values within each group.
+
+        See Also
+        --------
+        Series.prod : Return the product of the values over the requested axis.
+        DataFrame.prod : Return the product of the values over the requested axis.
+
+        Examples
+        --------
         For SeriesGroupBy:
 
-        >>> lst = ['a', 'a', 'b', 'b']
+        >>> lst = ["a", "a", "b", "b"]
         >>> ser = pd.Series([1, 2, 3, 4], index=lst)
         >>> ser
         a    1
@@ -3054,8 +3050,11 @@ def sum(
         For DataFrameGroupBy:
 
         >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
-        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
-        ...                   index=["tiger", "leopard", "cheetah", "lion"])
+        >>> df = pd.DataFrame(
+        ...     data,
+        ...     columns=["a", "b", "c"],
+        ...     index=["tiger", "leopard", "cheetah", "lion"],
+        ... )
         >>> df
                   a  b  c
           tiger   1  8  2
@@ -3066,10 +3065,8 @@ def sum(
              b    c
         a
         1   16   10
-        2   30   72"""
-        ),
-    )
-    def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
+        2   30   72
+        """
         return self._agg_general(
             numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
         )

From bb4ab4f2c0c2806f367679b7131fb98f718a3480 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli 
Date: Mon, 26 Aug 2024 20:36:12 +0200
Subject: [PATCH 011/176] ENH: support Arrow PyCapsule Interface on Series for
 export (#59587)

* ENH: support Arrow PyCapsule Interface on Series for export

* simplify

* simplify
---
 doc/source/whatsnew/v3.0.0.rst              |  1 +
 pandas/core/series.py                       | 27 +++++++++++++++++++++
 pandas/tests/series/test_arrow_interface.py | 23 ++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 pandas/tests/series/test_arrow_interface.py

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 1533f9267ce39..eaf9ce899f03a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -43,6 +43,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
+- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
 - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 17494f948876a..4f79e30f48f3c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -34,6 +34,7 @@
 from pandas._libs.lib import is_range_indexer
 from pandas.compat import PYPY
 from pandas.compat._constants import REF_COUNT
+from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.errors import (
     ChainedAssignmentError,
@@ -558,6 +559,32 @@ def _init_dict(
 
     # ----------------------------------------------------------------------
 
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the pandas Series as an Arrow C stream PyCapsule.
+
+        This relies on pyarrow to convert the pandas Series to the Arrow
+        format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
+        in its handling of the index, i.e. to ignore it).
+        This conversion is not necessarily zero-copy.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the dataframe should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+        pa = import_optional_dependency("pyarrow", min_version="16.0.0")
+        ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)])
+        return ca.__arrow_c_stream__(requested_schema)
+
+    # ----------------------------------------------------------------------
+
     @property
     def _constructor(self) -> type[Series]:
         return Series
diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py
new file mode 100644
index 0000000000000..34a2a638e4185
--- /dev/null
+++ b/pandas/tests/series/test_arrow_interface.py
@@ -0,0 +1,23 @@
+import ctypes
+
+import pytest
+
+import pandas as pd
+
+pa = pytest.importorskip("pyarrow", minversion="16.0")
+
+
+def test_series_arrow_interface():
+    s = pd.Series([1, 4, 2])
+
+    capsule = s.__arrow_c_stream__()
+    assert (
+        ctypes.pythonapi.PyCapsule_IsValid(
+            ctypes.py_object(capsule), b"arrow_array_stream"
+        )
+        == 1
+    )
+
+    ca = pa.chunked_array(s)
+    expected = pa.chunked_array([[1, 4, 2]])
+    assert ca.equals(expected)

From 15e9e7acca996660b2e53c3421702b4f41e81fd6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:55:48 -1000
Subject: [PATCH 012/176] REF: Minimize operations in recode_for_groupby
 (#59618)

---
 pandas/core/groupby/categorical.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
index 49130d91a0126..90cd8e3ffa1c7 100644
--- a/pandas/core/groupby/categorical.py
+++ b/pandas/core/groupby/categorical.py
@@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
         # In cases with c.ordered, this is equivalent to
         #  return c.remove_unused_categories(), c
 
-        unique_codes = unique1d(c.codes)  # type: ignore[no-untyped-call]
+        take_codes = unique1d(c.codes[c.codes != -1])  # type: ignore[no-untyped-call]
 
-        take_codes = unique_codes[unique_codes != -1]
         if sort:
             take_codes = np.sort(take_codes)
 
@@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
 
     # sort=False should order groups in as-encountered order (GH-8868)
 
-    # xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
-    all_codes = np.arange(c.categories.nunique())
+    # GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
     # GH 38140: exclude nan from indexer for categories
     unique_notnan_codes = unique1d(c.codes[c.codes != -1])  # type: ignore[no-untyped-call]
     if sort:
         unique_notnan_codes = np.sort(unique_notnan_codes)
-    if len(all_codes) > len(unique_notnan_codes):
+    if (num_cat := len(c.categories)) > len(unique_notnan_codes):
         # GH 13179: All categories need to be present, even if missing from the data
-        missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
+        missing_codes = np.setdiff1d(
+            np.arange(num_cat), unique_notnan_codes, assume_unique=True
+        )
         take_codes = np.concatenate((unique_notnan_codes, missing_codes))
     else:
         take_codes = unique_notnan_codes
 
-    return Categorical(c, c.unique().categories.take(take_codes))
+    return Categorical(c, c.categories.take(take_codes))

From 8f7080b10e2fbcdae1c230c8e659c75f2b76ae18 Mon Sep 17 00:00:00 2001
From: matiaslindgren 
Date: Mon, 26 Aug 2024 23:58:32 +0200
Subject: [PATCH 013/176] BUG: allow None as name in multi-index during join
 (#59546)

* allow None as name in multi-index

* update whatsnew

* add unit test for none label joins

* move bugfix note under Reshaping

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst          |  1 +
 pandas/core/indexes/base.py             |  4 ++--
 pandas/tests/reshape/merge/test_join.py | 26 +++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index eaf9ce899f03a..338fbc744510c 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -659,6 +659,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
+- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d39c337fbb4b2..c8dbea1fd39ea 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4516,8 +4516,8 @@ def _join_multi(self, other: Index, how: JoinHow):
         from pandas.core.reshape.merge import restore_dropped_levels_multijoin
 
         # figure out join names
-        self_names_list = list(com.not_none(*self.names))
-        other_names_list = list(com.not_none(*other.names))
+        self_names_list = list(self.names)
+        other_names_list = list(other.names)
         self_names_order = self_names_list.index
         other_names_order = other_names_list.index
         self_names = set(self_names_list)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index f090ded06119a..0f743332acbbe 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -1098,3 +1098,29 @@ def test_join_multiindex_categorical_output_index_dtype(how, values):
 
     result = df1.join(df2, how=how)
     tm.assert_frame_equal(result, expected)
+
+
+def test_join_multiindex_with_none_as_label():
+    # GH 58721
+    df1 = DataFrame(
+        {"A": [1]},
+        index=MultiIndex.from_tuples([(3, 3)], names=["X", None]),
+    )
+    df2 = DataFrame(
+        {"B": [2]},
+        index=MultiIndex.from_tuples([(3, 3)], names=[None, "X"]),
+    )
+
+    result12 = df1.join(df2)
+    expected12 = DataFrame(
+        {"A": [1], "B": [2]},
+        index=MultiIndex.from_tuples([(3, 3)], names=["X", None]),
+    )
+    tm.assert_frame_equal(result12, expected12)
+
+    result21 = df2.join(df1)
+    expected21 = DataFrame(
+        {"B": [2], "A": [1]},
+        index=MultiIndex.from_tuples([(3, 3)], names=[None, "X"]),
+    )
+    tm.assert_frame_equal(result21, expected21)

From bd81fef7edfe835871ee6ddaead759f5a0d1affb Mon Sep 17 00:00:00 2001
From: Kevin Amparado <109636487+KevsterAmp@users.noreply.github.com>
Date: Tue, 27 Aug 2024 08:07:24 +0800
Subject: [PATCH 014/176] PERF: Performance Improvement on `DataFrame.to_csv()`
 when `index=False` (#59608)

* add alternative ix when self.nlevel is 0

* add to latest whatsnew

* change np.full to np.empty
---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/io/formats/csvs.py      | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 338fbc744510c..85a1d1ad566b4 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -505,6 +505,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
+- Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`)
 - Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
 - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 50503e862ef43..75bcb51ef4be2 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -320,7 +320,11 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
         res = df._get_values_for_csv(**self._number_format)
         data = list(res._iter_column_arrays())
 
-        ix = self.data_index[slicer]._get_values_for_csv(**self._number_format)
+        ix = (
+            self.data_index[slicer]._get_values_for_csv(**self._number_format)
+            if self.nlevels != 0
+            else np.empty(end_i - start_i)
+        )
         libwriters.write_csv_rows(
             data,
             ix,

From 7c365796f866f7ead3fdea4ed1bf8083b096164f Mon Sep 17 00:00:00 2001
From: Harsha Lakamsani 
Date: Mon, 26 Aug 2024 17:09:21 -0700
Subject: [PATCH 015/176] DOC: fix docstring validation errors for
 pandas.io.formats.style.Styler (#59607)

* DOC: all pandas.io.formats.style.Styler docstring validation errors fixed

* DOCS: base to_excel docstring template extended for pandas.io.formats.style.Styler.to_excel
---
 ci/code_checks.sh          |  28 ----------
 pandas/core/generic.py     |   6 ++-
 pandas/io/formats/style.py | 106 +++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 29 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 76cc02652ec24..25317a08ca7b0 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -294,34 +294,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.UnsupportedFunctionCall SA01" \
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
-        -i "pandas.io.formats.style.Styler.apply RT03" \
-        -i "pandas.io.formats.style.Styler.apply_index RT03" \
-        -i "pandas.io.formats.style.Styler.background_gradient RT03" \
-        -i "pandas.io.formats.style.Styler.bar RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.clear SA01" \
-        -i "pandas.io.formats.style.Styler.concat RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.export RT03" \
-        -i "pandas.io.formats.style.Styler.from_custom_template SA01" \
-        -i "pandas.io.formats.style.Styler.hide RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.highlight_between RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_max RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_min RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_null RT03" \
-        -i "pandas.io.formats.style.Styler.highlight_quantile RT03" \
-        -i "pandas.io.formats.style.Styler.map RT03" \
-        -i "pandas.io.formats.style.Styler.map_index RT03" \
-        -i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_table_attributes PR07,RT03" \
-        -i "pandas.io.formats.style.Styler.set_table_styles RT03" \
-        -i "pandas.io.formats.style.Styler.set_td_classes RT03" \
-        -i "pandas.io.formats.style.Styler.set_tooltips RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.set_uuid PR07,RT03,SA01" \
-        -i "pandas.io.formats.style.Styler.text_gradient RT03" \
-        -i "pandas.io.formats.style.Styler.to_excel PR01" \
-        -i "pandas.io.formats.style.Styler.to_string SA01" \
-        -i "pandas.io.formats.style.Styler.use RT03" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
         -i "pandas.io.stata.StataReader.data_label SA01" \
         -i "pandas.io.stata.StataReader.value_labels RT03,SA01" \
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 61fa5c49a8c5b..eae3249aa79a4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2123,11 +2123,13 @@ def _repr_data_resource_(self):
         klass="object",
         storage_options=_shared_docs["storage_options"],
         storage_options_versionadded="1.2.0",
+        encoding_parameter="",
+        verbose_parameter="",
         extra_parameters=textwrap.dedent(
             """\
         engine_kwargs : dict, optional
             Arbitrary keyword arguments passed to excel engine.
-    """
+        """
         ),
     )
     def to_excel(
@@ -2196,9 +2198,11 @@ def to_excel(
 
         merge_cells : bool, default True
             Write MultiIndex and Hierarchical Rows as merged cells.
+        {encoding_parameter}
         inf_rep : str, default 'inf'
             Representation for infinity (there is no native representation for
             infinity in Excel).
+        {verbose_parameter}
         freeze_panes : tuple of int (length 2), optional
             Specifies the one-based bottommost row and rightmost column that
             is to be frozen.
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 6f4c2fa6c6eae..82bc0301fed3a 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -7,6 +7,7 @@
 import copy
 from functools import partial
 import operator
+import textwrap
 from typing import (
     TYPE_CHECKING,
     overload,
@@ -306,6 +307,12 @@ def concat(self, other: Styler) -> Styler:
         Returns
         -------
         Styler
+            Instance of class with specified Styler appended.
+
+        See Also
+        --------
+        Styler.clear : Reset the ``Styler``, removing any previously applied styles.
+        Styler.export : Export the styles applied to the current Styler.
 
         Notes
         -----
@@ -447,6 +454,15 @@ def set_tooltips(
         Returns
         -------
         Styler
+            Instance of class with DataFrame set for strings on ``Styler``
+                generating ``:hover`` tooltips.
+
+        See Also
+        --------
+        Styler.set_table_attributes : Set the table attributes added to the
+            ```` HTML element.
+        Styler.set_table_styles : Set the table styles included within the
+            ``