Fix a few issues found in my team's code (#782)

* working with 2.1.1 * fixes for nightly * fix invalid usage for mypy. use different resample by month keys based on version * fix pyproject 3.13 dependency. test df.apply via pytest_warns_bounded. change BusinessDay * fix 767, 743, 641 * use IntStrT with read_excel
pandas-dev · Sep 25, 2023 · 05244cb · 05244cb
1 parent a912c8e
commit 05244cb
Show file tree

Hide file tree

Showing 7 changed files with 146 additions and 15 deletions.
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -47,6 +47,7 @@ AnyArrayLike: TypeAlias = Index | Series | np.ndarray
 PythonScalar: TypeAlias = str | bool | complex
 DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta)
 PandasScalar: TypeAlias = bytes | datetime.date | datetime.datetime | datetime.timedelta
+IntStrT = TypeVar("IntStrT", int, str)
 # Scalar: TypeAlias = PythonScalar | PandasScalar
 
 DatetimeLike: TypeAlias = datetime.datetime | np.datetime64 | Timestamp

diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -56,6 +56,7 @@ import xarray as xr
 
 from pandas._libs.missing import NAType
 from pandas._libs.tslibs import BaseOffset
+from pandas._libs.tslibs.nattype import NaTType
 from pandas._typing import (
     S1,
     AggFuncTypeBase,
@@ -152,7 +153,7 @@ class _iLocIndexerFrame(_iLocIndexer):
         | tuple[IndexType, int]
         | tuple[IndexType, IndexType]
         | tuple[int, IndexType],
-        value: Scalar | Series | DataFrame | np.ndarray | None,
+        value: Scalar | Series | DataFrame | np.ndarray | NAType | NaTType | None,
     ) -> None: ...
 
 class _LocIndexerFrame(_LocIndexer):
@@ -201,13 +202,13 @@ class _LocIndexerFrame(_LocIndexer):
     def __setitem__(
         self,
         idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT],
-        value: Scalar | ArrayLike | Series | DataFrame | list | None,
+        value: Scalar | NAType | NaTType | ArrayLike | Series | DataFrame | list | None,
     ) -> None: ...
     @overload
     def __setitem__(
         self,
         idx: tuple[_IndexSliceTuple, HashableT],
-        value: Scalar | ArrayLike | Series | list | None,
+        value: Scalar | NAType | NaTType | ArrayLike | Series | list | None,
     ) -> None: ...
 
 class DataFrame(NDFrame, OpsMixin):

diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -134,9 +134,11 @@ from pandas._typing import (
     Renamer,
     ReplaceMethod,
     Scalar,
+    ScalarT,
     SeriesByT,
     SortKind,
     StrDtypeArg,
+    StrLike,
     TimedeltaDtypeArg,
     TimestampConvention,
     TimestampDtypeArg,
@@ -167,7 +169,9 @@ class _iLocIndexerSeries(_iLocIndexer, Generic[S1]):
     def __setitem__(self, idx: int, value: S1 | None) -> None: ...
     @overload
     def __setitem__(
-        self, idx: Index | slice | np_ndarray_anyint, value: S1 | Series[S1] | None
+        self,
+        idx: Index | slice | np_ndarray_anyint | list[int],
+        value: S1 | Series[S1] | None,
     ) -> None: ...
 
 class _LocIndexerSeries(_LocIndexer, Generic[S1]):
@@ -207,7 +211,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]):
     @overload
     def __setitem__(
         self,
-        idx: list[int] | list[str] | list[str | int],
+        idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT],
         value: S1 | ArrayLike | Series[S1] | None,
     ) -> None: ...
 

diff --git a/pandas-stubs/io/excel/_base.pyi b/pandas-stubs/io/excel/_base.pyi
@@ -24,6 +24,7 @@ from pandas._typing import (
     Dtype,
     DtypeBackend,
     FilePath,
+    IntStrT,
     ListLikeHashable,
     ReadBuffer,
     StorageOptions,
@@ -41,7 +42,86 @@ def read_excel(
     | Book
     | OpenDocument
     | pyxlsb.workbook.Workbook,
-    sheet_name: list[int | str] | None,
+    sheet_name: list[IntStrT],
+    *,
+    header: int | Sequence[int] | None = ...,
+    names: ListLikeHashable | None = ...,
+    index_col: int | Sequence[int] | None = ...,
+    usecols: str | UsecolsArgType = ...,
+    dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    converters: Mapping[int | str, Callable[[object], object]] | None = ...,
+    true_values: Iterable[Hashable] | None = ...,
+    false_values: Iterable[Hashable] | None = ...,
+    skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
+    nrows: int | None = ...,
+    na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
+    keep_default_na: bool = ...,
+    na_filter: bool = ...,
+    verbose: bool = ...,
+    parse_dates: bool
+    | Sequence[int]
+    | Sequence[Sequence[str] | Sequence[int]]
+    | dict[str, Sequence[int] | list[str]] = ...,
+    date_format: dict[Hashable, str] | str | None = ...,
+    thousands: str | None = ...,
+    decimal: str = ...,
+    comment: str | None = ...,
+    skipfooter: int = ...,
+    storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
+) -> dict[IntStrT, DataFrame]: ...
+@overload
+def read_excel(
+    io: FilePath
+    | ReadBuffer[bytes]
+    | bytes
+    | ExcelFile
+    | Workbook
+    | Book
+    | OpenDocument
+    | pyxlsb.workbook.Workbook,
+    sheet_name: None,
+    *,
+    header: int | Sequence[int] | None = ...,
+    names: ListLikeHashable | None = ...,
+    index_col: int | Sequence[int] | None = ...,
+    usecols: str | UsecolsArgType = ...,
+    dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    converters: Mapping[int | str, Callable[[object], object]] | None = ...,
+    true_values: Iterable[Hashable] | None = ...,
+    false_values: Iterable[Hashable] | None = ...,
+    skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
+    nrows: int | None = ...,
+    na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
+    keep_default_na: bool = ...,
+    na_filter: bool = ...,
+    verbose: bool = ...,
+    parse_dates: bool
+    | Sequence[int]
+    | Sequence[Sequence[str] | Sequence[int]]
+    | dict[str, Sequence[int] | list[str]] = ...,
+    date_format: dict[Hashable, str] | str | None = ...,
+    thousands: str | None = ...,
+    decimal: str = ...,
+    comment: str | None = ...,
+    skipfooter: int = ...,
+    storage_options: StorageOptions = ...,
+    dtype_backend: DtypeBackend | NoDefault = ...,
+) -> dict[str, DataFrame]: ...
+@overload
+# mypy says this won't be matched
+def read_excel(  # type: ignore[misc]
+    io: FilePath
+    | ReadBuffer[bytes]
+    | bytes
+    | ExcelFile
+    | Workbook
+    | Book
+    | OpenDocument
+    | pyxlsb.workbook.Workbook,
+    sheet_name: list[int | str],
     *,
     header: int | Sequence[int] | None = ...,
     names: ListLikeHashable | None = ...,

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -2849,3 +2849,20 @@ def test_getitem_dict_keys() -> None:
     some_columns = {"a": [1], "b": [2]}
     df = pd.DataFrame.from_dict(some_columns)
     check(assert_type(df[some_columns.keys()], pd.DataFrame), pd.DataFrame)
+
+
+def test_frame_setitem_na() -> None:
+    # GH 743
+    df = pd.DataFrame(
+        {"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")},
+        index=pd.Index(["a", "b", "c"]),
+    ).convert_dtypes()
+
+    ind = pd.Index(["a", "c"])
+
+    df.loc[ind, :] = pd.NA
+    df.iloc[[0, 2], :] = pd.NA
+
+    df["x"] = df["y"] + pd.Timedelta(days=3)
+    df.loc[ind, :] = pd.NaT
+    df.iloc[[0, 2], :] = pd.NaT
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -887,7 +887,7 @@ def test_read_excel() -> None:
         check(
             assert_type(
                 pd.read_excel(path, sheet_name=["Sheet1"]),
-                dict[Union[int, str], pd.DataFrame],
+                dict[str, pd.DataFrame],
             ),
             dict,
         )
@@ -896,9 +896,7 @@ def test_read_excel() -> None:
             assert_type(pd.read_excel(path, sheet_name=0), pd.DataFrame), pd.DataFrame
         )
         check(
-            assert_type(
-                pd.read_excel(path, sheet_name=[0]), dict[Union[int, str], pd.DataFrame]
-            ),
+            assert_type(pd.read_excel(path, sheet_name=[0]), dict[int, pd.DataFrame]),
             dict,
         )
         check(
@@ -908,10 +906,11 @@ def test_read_excel() -> None:
             ),
             dict,
         )
+        # GH 641
         check(
             assert_type(
                 pd.read_excel(path, sheet_name=None),
-                dict[Union[int, str], pd.DataFrame],
+                dict[str, pd.DataFrame],
             ),
             dict,
         )
@@ -1032,14 +1031,12 @@ def test_read_excel_list():
         check(
             assert_type(
                 read_excel(path, sheet_name=["Sheet1"]),
-                dict[Union[str, int], DataFrame],
+                dict[str, DataFrame],
             ),
             dict,
         )
         check(
-            assert_type(
-                read_excel(path, sheet_name=[0]), dict[Union[str, int], DataFrame]
-            ),
+            assert_type(read_excel(path, sheet_name=[0]), dict[int, DataFrame]),
             dict,
         )
 

diff --git a/tests/test_series.py b/tests/test_series.py
@@ -2799,3 +2799,34 @@ def test_rank() -> None:
     check(
         assert_type(pd.Series([1, 2]).rank(), "pd.Series[float]"), pd.Series, np.float64
     )
+
+
+def test_series_setitem_multiindex() -> None:
+    # GH 767
+    df = (
+        pd.DataFrame({"x": [1, 2, 3, 4]})
+        .assign(y=lambda df: df["x"] * 10, z=lambda df: df["x"] * 100)
+        .set_index(["x", "y"])
+    )
+    ind = pd.Index([2, 3])
+    s = df["z"]
+
+    s.loc[pd.IndexSlice[ind, :]] = 30
+
+
+def test_series_setitem_na() -> None:
+    # GH 743
+    df = pd.DataFrame(
+        {"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")},
+        index=pd.Index(["a", "b", "c"]),
+    ).convert_dtypes()
+
+    ind = pd.Index(["a", "c"])
+    s = df["x"].copy()
+
+    s.loc[ind] = pd.NA
+    s.iloc[[0, 2]] = pd.NA
+
+    s2 = df["y"].copy()
+    s2.loc[ind] = pd.NaT
+    s2.iloc[[0, 2]] = pd.NaT