Skip to content

Commit

Permalink
Fix a few issues found in my team's code (#782)
Browse files Browse the repository at this point in the history
* working with 2.1.1

* fixes for nightly

* fix invalid usage for mypy. use different resample by month keys based on version

* fix pyproject 3.13 dependency. test df.apply via pytest_warns_bounded. change BusinessDay

* fix 767, 743, 641

* use IntStrT with read_excel
  • Loading branch information
Dr-Irv authored Sep 25, 2023
1 parent a912c8e commit 05244cb
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 15 deletions.
1 change: 1 addition & 0 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ AnyArrayLike: TypeAlias = Index | Series | np.ndarray
PythonScalar: TypeAlias = str | bool | complex
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta)
PandasScalar: TypeAlias = bytes | datetime.date | datetime.datetime | datetime.timedelta
IntStrT = TypeVar("IntStrT", int, str)
# Scalar: TypeAlias = PythonScalar | PandasScalar

DatetimeLike: TypeAlias = datetime.datetime | np.datetime64 | Timestamp
Expand Down
7 changes: 4 additions & 3 deletions pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import xarray as xr

from pandas._libs.missing import NAType
from pandas._libs.tslibs import BaseOffset
from pandas._libs.tslibs.nattype import NaTType
from pandas._typing import (
S1,
AggFuncTypeBase,
Expand Down Expand Up @@ -152,7 +153,7 @@ class _iLocIndexerFrame(_iLocIndexer):
| tuple[IndexType, int]
| tuple[IndexType, IndexType]
| tuple[int, IndexType],
value: Scalar | Series | DataFrame | np.ndarray | None,
value: Scalar | Series | DataFrame | np.ndarray | NAType | NaTType | None,
) -> None: ...

class _LocIndexerFrame(_LocIndexer):
Expand Down Expand Up @@ -201,13 +202,13 @@ class _LocIndexerFrame(_LocIndexer):
def __setitem__(
self,
idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT],
value: Scalar | ArrayLike | Series | DataFrame | list | None,
value: Scalar | NAType | NaTType | ArrayLike | Series | DataFrame | list | None,
) -> None: ...
@overload
def __setitem__(
self,
idx: tuple[_IndexSliceTuple, HashableT],
value: Scalar | ArrayLike | Series | list | None,
value: Scalar | NAType | NaTType | ArrayLike | Series | list | None,
) -> None: ...

class DataFrame(NDFrame, OpsMixin):
Expand Down
8 changes: 6 additions & 2 deletions pandas-stubs/core/series.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,11 @@ from pandas._typing import (
Renamer,
ReplaceMethod,
Scalar,
ScalarT,
SeriesByT,
SortKind,
StrDtypeArg,
StrLike,
TimedeltaDtypeArg,
TimestampConvention,
TimestampDtypeArg,
Expand Down Expand Up @@ -167,7 +169,9 @@ class _iLocIndexerSeries(_iLocIndexer, Generic[S1]):
def __setitem__(self, idx: int, value: S1 | None) -> None: ...
@overload
def __setitem__(
self, idx: Index | slice | np_ndarray_anyint, value: S1 | Series[S1] | None
self,
idx: Index | slice | np_ndarray_anyint | list[int],
value: S1 | Series[S1] | None,
) -> None: ...

class _LocIndexerSeries(_LocIndexer, Generic[S1]):
Expand Down Expand Up @@ -207,7 +211,7 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]):
@overload
def __setitem__(
self,
idx: list[int] | list[str] | list[str | int],
idx: MaskType | StrLike | _IndexSliceTuple | list[ScalarT],
value: S1 | ArrayLike | Series[S1] | None,
) -> None: ...

Expand Down
82 changes: 81 additions & 1 deletion pandas-stubs/io/excel/_base.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ from pandas._typing import (
Dtype,
DtypeBackend,
FilePath,
IntStrT,
ListLikeHashable,
ReadBuffer,
StorageOptions,
Expand All @@ -41,7 +42,86 @@ def read_excel(
| Book
| OpenDocument
| pyxlsb.workbook.Workbook,
sheet_name: list[int | str] | None,
sheet_name: list[IntStrT],
*,
header: int | Sequence[int] | None = ...,
names: ListLikeHashable | None = ...,
index_col: int | Sequence[int] | None = ...,
usecols: str | UsecolsArgType = ...,
dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
converters: Mapping[int | str, Callable[[object], object]] | None = ...,
true_values: Iterable[Hashable] | None = ...,
false_values: Iterable[Hashable] | None = ...,
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
nrows: int | None = ...,
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
keep_default_na: bool = ...,
na_filter: bool = ...,
verbose: bool = ...,
parse_dates: bool
| Sequence[int]
| Sequence[Sequence[str] | Sequence[int]]
| dict[str, Sequence[int] | list[str]] = ...,
date_format: dict[Hashable, str] | str | None = ...,
thousands: str | None = ...,
decimal: str = ...,
comment: str | None = ...,
skipfooter: int = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
) -> dict[IntStrT, DataFrame]: ...
@overload
def read_excel(
io: FilePath
| ReadBuffer[bytes]
| bytes
| ExcelFile
| Workbook
| Book
| OpenDocument
| pyxlsb.workbook.Workbook,
sheet_name: None,
*,
header: int | Sequence[int] | None = ...,
names: ListLikeHashable | None = ...,
index_col: int | Sequence[int] | None = ...,
usecols: str | UsecolsArgType = ...,
dtype: str | Dtype | Mapping[str, str | Dtype] | None = ...,
engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
converters: Mapping[int | str, Callable[[object], object]] | None = ...,
true_values: Iterable[Hashable] | None = ...,
false_values: Iterable[Hashable] | None = ...,
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
nrows: int | None = ...,
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
keep_default_na: bool = ...,
na_filter: bool = ...,
verbose: bool = ...,
parse_dates: bool
| Sequence[int]
| Sequence[Sequence[str] | Sequence[int]]
| dict[str, Sequence[int] | list[str]] = ...,
date_format: dict[Hashable, str] | str | None = ...,
thousands: str | None = ...,
decimal: str = ...,
comment: str | None = ...,
skipfooter: int = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
) -> dict[str, DataFrame]: ...
@overload
# mypy says this won't be matched
def read_excel( # type: ignore[misc]
io: FilePath
| ReadBuffer[bytes]
| bytes
| ExcelFile
| Workbook
| Book
| OpenDocument
| pyxlsb.workbook.Workbook,
sheet_name: list[int | str],
*,
header: int | Sequence[int] | None = ...,
names: ListLikeHashable | None = ...,
Expand Down
17 changes: 17 additions & 0 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2849,3 +2849,20 @@ def test_getitem_dict_keys() -> None:
some_columns = {"a": [1], "b": [2]}
df = pd.DataFrame.from_dict(some_columns)
check(assert_type(df[some_columns.keys()], pd.DataFrame), pd.DataFrame)


def test_frame_setitem_na() -> None:
# GH 743
df = pd.DataFrame(
{"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")},
index=pd.Index(["a", "b", "c"]),
).convert_dtypes()

ind = pd.Index(["a", "c"])

df.loc[ind, :] = pd.NA
df.iloc[[0, 2], :] = pd.NA

df["x"] = df["y"] + pd.Timedelta(days=3)
df.loc[ind, :] = pd.NaT
df.iloc[[0, 2], :] = pd.NaT
15 changes: 6 additions & 9 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def test_read_excel() -> None:
check(
assert_type(
pd.read_excel(path, sheet_name=["Sheet1"]),
dict[Union[int, str], pd.DataFrame],
dict[str, pd.DataFrame],
),
dict,
)
Expand All @@ -896,9 +896,7 @@ def test_read_excel() -> None:
assert_type(pd.read_excel(path, sheet_name=0), pd.DataFrame), pd.DataFrame
)
check(
assert_type(
pd.read_excel(path, sheet_name=[0]), dict[Union[int, str], pd.DataFrame]
),
assert_type(pd.read_excel(path, sheet_name=[0]), dict[int, pd.DataFrame]),
dict,
)
check(
Expand All @@ -908,10 +906,11 @@ def test_read_excel() -> None:
),
dict,
)
# GH 641
check(
assert_type(
pd.read_excel(path, sheet_name=None),
dict[Union[int, str], pd.DataFrame],
dict[str, pd.DataFrame],
),
dict,
)
Expand Down Expand Up @@ -1032,14 +1031,12 @@ def test_read_excel_list():
check(
assert_type(
read_excel(path, sheet_name=["Sheet1"]),
dict[Union[str, int], DataFrame],
dict[str, DataFrame],
),
dict,
)
check(
assert_type(
read_excel(path, sheet_name=[0]), dict[Union[str, int], DataFrame]
),
assert_type(read_excel(path, sheet_name=[0]), dict[int, DataFrame]),
dict,
)

Expand Down
31 changes: 31 additions & 0 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2799,3 +2799,34 @@ def test_rank() -> None:
check(
assert_type(pd.Series([1, 2]).rank(), "pd.Series[float]"), pd.Series, np.float64
)


def test_series_setitem_multiindex() -> None:
# GH 767
df = (
pd.DataFrame({"x": [1, 2, 3, 4]})
.assign(y=lambda df: df["x"] * 10, z=lambda df: df["x"] * 100)
.set_index(["x", "y"])
)
ind = pd.Index([2, 3])
s = df["z"]

s.loc[pd.IndexSlice[ind, :]] = 30


def test_series_setitem_na() -> None:
# GH 743
df = pd.DataFrame(
{"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")},
index=pd.Index(["a", "b", "c"]),
).convert_dtypes()

ind = pd.Index(["a", "c"])
s = df["x"].copy()

s.loc[ind] = pd.NA
s.iloc[[0, 2]] = pd.NA

s2 = df["y"].copy()
s2.loc[ind] = pd.NaT
s2.iloc[[0, 2]] = pd.NaT

0 comments on commit 05244cb

Please sign in to comment.