Skip to content

Commit

Permalink
GH1055 Add pandas.api.typing to pandas-stubs (#1058)
Browse files Browse the repository at this point in the history
* GH1055 Add pandas.api.typing to pandas-stubs

* GH1055 Fix lint

* GH1055 Fix assert_type

* GH1055 Fix tests

* Revert to generic

* GH1055 Fix tests

* GH1055 new test format

* GH1055 PR Feedback

* GH1055 TypeAlias Feedback
  • Loading branch information
loicdiridollou authored Dec 1, 2024
1 parent ddccc56 commit d4d399e
Show file tree
Hide file tree
Showing 2 changed files with 247 additions and 0 deletions.
29 changes: 29 additions & 0 deletions pandas-stubs/api/typing/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pandas.core.groupby import (
DataFrameGroupBy as DataFrameGroupBy,
SeriesGroupBy as SeriesGroupBy,
)
from pandas.core.resample import (
DatetimeIndexResamplerGroupby as DatetimeIndexResamplerGroupby,
PeriodIndexResamplerGroupby as PeriodIndexResamplerGroupby,
Resampler as Resampler,
TimedeltaIndexResamplerGroupby as TimedeltaIndexResamplerGroupby,
TimeGrouper as TimeGrouper,
)
from pandas.core.window import (
Expanding as Expanding,
ExpandingGroupby as ExpandingGroupby,
ExponentialMovingWindow as ExponentialMovingWindow,
ExponentialMovingWindowGroupby as ExponentialMovingWindowGroupby,
Rolling as Rolling,
RollingGroupby as RollingGroupby,
Window as Window,
)

from pandas._libs import NaTType as NaTType
from pandas._libs.missing import NAType as NAType

from pandas.io.json._json import JsonReader as JsonReader

# SASReader is not defined so commenting it out for now
# from pandas.io.sas.sasreader import SASReader as SASReader
from pandas.io.stata import StataReader as StataReader
218 changes: 218 additions & 0 deletions tests/test_api_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
"""Test module for classes in pandas.api.typing."""

import numpy as np
import pandas as pd
from pandas._testing import ensure_clean
from pandas.api.typing import (
DataFrameGroupBy,
DatetimeIndexResamplerGroupby,
Expanding,
ExpandingGroupby,
ExponentialMovingWindow,
ExponentialMovingWindowGroupby,
JsonReader,
NaTType,
NAType,
PeriodIndexResamplerGroupby,
Resampler,
Rolling,
RollingGroupby,
SeriesGroupBy,
StataReader,
TimedeltaIndexResamplerGroupby,
TimeGrouper,
Window,
)
import pytest
from typing_extensions import (
TypeAlias,
assert_type,
)

from tests import check

from pandas.io.json._json import read_json

ResamplerGroupBy: TypeAlias = (
DatetimeIndexResamplerGroupby
| PeriodIndexResamplerGroupby
| TimedeltaIndexResamplerGroupby
)


def test_dataframegroupby():
df = pd.DataFrame({"a": [1, 2, 3]})
group = df.groupby("a")

def f1(gb: DataFrameGroupBy):
check(gb, DataFrameGroupBy)

f1(group)


def test_seriesgroupby():
sr = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "a"]))

def f1(gb: SeriesGroupBy):
check(gb, SeriesGroupBy)

f1(sr.groupby(level=0))


def tests_datetimeindexersamplergroupby() -> None:
idx = pd.date_range("1999-1-1", periods=365, freq="D")
df = pd.DataFrame(
np.random.standard_normal((365, 2)), index=idx, columns=["col1", "col2"]
)
gb_df = df.groupby("col2")

def f1(gb: ResamplerGroupBy):
check(gb, DatetimeIndexResamplerGroupby)

f1(gb_df.resample("ME"))


def test_timedeltaindexresamplergroupby() -> None:
idx = pd.TimedeltaIndex(["0 days", "1 days", "2 days", "3 days", "4 days"])
df = pd.DataFrame(
np.random.standard_normal((5, 2)), index=idx, columns=["col1", "col2"]
)
gb_df = df.groupby("col2")

def f1(gb: ResamplerGroupBy):
check(gb, TimedeltaIndexResamplerGroupby)

f1(gb_df.resample("1D"))


@pytest.mark.skip("Resampling with a PeriodIndex is deprecated.")
def test_periodindexresamplergroupby() -> None:
idx = pd.period_range("2020-01-28 09:00", periods=4, freq="D")
df = pd.DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"])

def f1(gb: ResamplerGroupBy):
check(gb, PeriodIndexResamplerGroupby)

f1(df.groupby("a").resample("3min"))


def test_natype() -> None:
i64dt = pd.Int64Dtype()
check(assert_type(i64dt.na_value, NAType), NAType)


def test_nattype() -> None:
td = pd.Timedelta("1 day")
as_nat = pd.NaT

check(assert_type(td + as_nat, NaTType), NaTType)


def test_expanding() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: Expanding):
check(gb, Expanding)

f1(df.expanding())


def test_expanding_groubpy() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExpandingGroupby):
check(gb, ExpandingGroupby)

f1(df.groupby("B").expanding())


def test_ewm() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExponentialMovingWindow):
check(gb, ExponentialMovingWindow)

f1(df.ewm(2))


def test_ewm_groubpy() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: ExponentialMovingWindowGroupby):
check(gb, ExponentialMovingWindowGroupby)

f1(df.groupby("B").ewm(2))


def test_json_reader() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: JsonReader):
check(gb, JsonReader)

with ensure_clean() as path:
check(assert_type(df.to_json(path), None), type(None))
json_reader = read_json(path, chunksize=1, lines=True)
f1(json_reader)
json_reader.close()


def test_resampler() -> None:
s = pd.Series([1, 2, 3, 4, 5], index=pd.date_range("20130101", periods=5, freq="s"))

def f1(gb: Resampler):
check(gb, Resampler)

f1(s.resample("3min"))


def test_rolling() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: Rolling):
check(gb, Rolling)

f1(df.rolling(2))


def test_rolling_groupby() -> None:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

def f1(gb: RollingGroupby):
check(gb, RollingGroupby)

f1(df.groupby("B").rolling(2))


def test_timegrouper() -> None:
grouper = pd.Grouper(key="Publish date", freq="1W")

def f1(gb: TimeGrouper):
check(gb, TimeGrouper)

f1(grouper)


def test_window() -> None:
ser = pd.Series([0, 1, 5, 2, 8])

def f1(gb: Window):
check(gb, Window)

f1(ser.rolling(2, win_type="gaussian"))


def test_statereader() -> None:
df = pd.DataFrame([[1, 2], [3, 4]], columns=["col_1", "col_2"])
time_stamp = pd.Timestamp(2000, 2, 29, 14, 21)
variable_labels = {"col_1": "This is an example"}
with ensure_clean() as path:
df.to_stata(
path, time_stamp=time_stamp, variable_labels=variable_labels, version=None
)

def f1(gb: StataReader):
check(gb, StataReader)

with StataReader(path) as reader:
f1(reader)

0 comments on commit d4d399e

Please sign in to comment.