diff --git a/pandas-stubs/api/extensions/__init__.pyi b/pandas-stubs/api/extensions/__init__.pyi index c0971d51..41378eb7 100644 --- a/pandas-stubs/api/extensions/__init__.pyi +++ b/pandas-stubs/api/extensions/__init__.pyi @@ -3,7 +3,6 @@ from pandas.core.accessor import ( register_index_accessor as register_index_accessor, register_series_accessor as register_series_accessor, ) -from pandas.core.algorithms import take as take from pandas.core.arrays import ( ExtensionArray as ExtensionArray, ExtensionScalarOpsMixin as ExtensionScalarOpsMixin, diff --git a/pandas-stubs/core/algorithms.pyi b/pandas-stubs/core/algorithms.pyi index c628d885..72e907cc 100644 --- a/pandas-stubs/core/algorithms.pyi +++ b/pandas-stubs/core/algorithms.pyi @@ -1,74 +1,40 @@ -from typing import Any +from typing import ( + Any, + overload, +) import numpy as np -from pandas.core.indexes.base import Index - -def unique(values): ... - -unique1d = unique - -def isin(comps, values) -> np.ndarray: ... +from pandas import ( + Categorical, + Index, + Series, +) +from pandas.api.extensions import ExtensionArray + +from pandas._typing import AnyArrayLike + +@overload +def unique(values: Index) -> Index: ... +@overload +def unique(values: Categorical) -> Categorical: ... +@overload +def unique(values: Series) -> np.ndarray | ExtensionArray: ... +@overload +def unique(values: np.ndarray | list) -> np.ndarray: ... +@overload +def unique(values: ExtensionArray) -> ExtensionArray: ... def factorize( values: Any, sort: bool = ..., - na_sentinel: int = ..., + na_sentinel: int | None = ..., + use_na_sentinel: bool = ..., size_hint: int | None = ..., ) -> tuple[np.ndarray, np.ndarray | Index]: ... def value_counts( - values, + values: AnyArrayLike | list | tuple, sort: bool = ..., ascending: bool = ..., normalize: bool = ..., - bins=..., + bins: int | None = ..., dropna: bool = ..., ) -> Series: ... -def duplicated(values, keep=...) -> np.ndarray: ... -def mode(values, dropna: bool = ...) -> Series: ... -def rank( - values, - axis: int = ..., - method: str = ..., - na_option: str = ..., - ascending: bool = ..., - pct: bool = ..., -): ... -def checked_add_with_arr(arr, b, arr_mask=..., b_mask=...): ... -def quantile(x, q, interpolation_method: str = ...): ... - -class SelectN: - obj = ... - n = ... - keep = ... - def __init__(self, obj, n: int, keep: str) -> None: ... - def nlargest(self): ... - def nsmallest(self): ... - @staticmethod - def is_valid_dtype_n_method(dtype) -> bool: ... - -class SelectNSeries(SelectN): - def compute(self, method): ... - -class SelectNFrame(SelectN): - columns = ... - def __init__(self, obj, n: int, keep: str, columns) -> None: ... - def compute(self, method): ... - -def take(arr, indices, axis: int = ..., allow_fill: bool = ..., fill_value=...): ... -def take_nd( - arr, indexer, axis: int = ..., out=..., fill_value=..., allow_fill: bool = ... -): ... - -take_1d = take_nd - -def take_2d_multi(arr, indexer, fill_value=...): ... -def searchsorted(arr, value, side: str = ..., sorter=...): ... -def diff(arr, n: int, axis: int = ..., stacklevel=...): ... -def safe_sort( - values, - codes=..., - na_sentinel: int = ..., - assume_unique: bool = ..., - verify: bool = ..., -) -> np.ndarray | tuple[np.ndarray, np.ndarray]: ... - -from pandas import Series diff --git a/pandas-stubs/core/apply.pyi b/pandas-stubs/core/apply.pyi deleted file mode 100644 index f6829347..00000000 --- a/pandas-stubs/core/apply.pyi +++ /dev/null @@ -1,101 +0,0 @@ -import abc -from typing import ( - Any, - Iterator, -) - -ResType = dict[int, Any] - -def frame_apply( - obj: DataFrame, - func, - axis=..., - raw: bool = ..., - result_type=..., - ignore_failures: bool = ..., - args=..., - kwds=..., -): ... - -class FrameApply(metaclass=abc.ABCMeta): - axis: int - @property - @abc.abstractmethod - def result_index(self) -> Index: ... - @property - @abc.abstractmethod - def result_columns(self) -> Index: ... - @property - @abc.abstractmethod - def series_generator(self) -> Iterator[Series]: ... - @abc.abstractmethod - def wrap_results_for_axis( - self, results: ResType, res_index: Index - ) -> Series | DataFrame: ... - obj = ... - raw = ... - ignore_failures = ... - args = ... - kwds = ... - result_type = ... - f = ... - def __init__( - self, - obj: DataFrame, - func, - raw: bool, - result_type, - ignore_failures: bool, - args, - kwds, - ): ... - @property - def res_columns(self) -> Index: ... - @property - def columns(self) -> Index: ... - @property - def index(self) -> Index: ... - def values(self): ... - def dtypes(self) -> Series: ... - @property - def agg_axis(self) -> Index: ... - def get_result(self): ... - def apply_empty_result(self): ... - def apply_raw(self): ... - def apply_broadcast(self, target: DataFrame) -> DataFrame: ... - def apply_standard(self): ... - def apply_series_generator(self) -> tuple[ResType, Index]: ... - def wrap_results( - self, results: ResType, res_index: Index - ) -> Series | DataFrame: ... - -class FrameRowApply(FrameApply): - axis: int = ... - def apply_broadcast(self, target: DataFrame) -> DataFrame: ... - @property - def series_generator(self): ... - @property - def result_index(self) -> Index: ... - @property - def result_columns(self) -> Index: ... - def wrap_results_for_axis( - self, results: ResType, res_index: Index - ) -> DataFrame: ... - -class FrameColumnApply(FrameApply): - axis: int = ... - def apply_broadcast(self, target: DataFrame) -> DataFrame: ... - @property - def series_generator(self): ... - @property - def result_index(self) -> Index: ... - @property - def result_columns(self) -> Index: ... - def wrap_results_for_axis( - self, results: ResType, res_index: Index - ) -> Series | DataFrame: ... - def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: ... - -from pandas.core.frame import DataFrame -from pandas.core.indexes.base import Index -from pandas.core.series import Series diff --git a/pandas-stubs/core/nanops.pyi b/pandas-stubs/core/nanops.pyi deleted file mode 100644 index d649d326..00000000 --- a/pandas-stubs/core/nanops.pyi +++ /dev/null @@ -1,46 +0,0 @@ -bn = ... - -def set_use_bottleneck(v: bool = ...) -> None: ... - -class disallow: - dtypes = ... - def __init__(self, *dtypes) -> None: ... - def check(self, obj) -> bool: ... - def __call__(self, f): ... - -class bottleneck_switch: - name = ... - kwargs = ... - def __init__(self, name=..., **kwargs) -> None: ... - def __call__(self, alt): ... - -def nanany(values, axis=..., skipna: bool = ..., mask=...): ... -def nanall(values, axis=..., skipna: bool = ..., mask=...): ... -def nansum(values, axis=..., skipna: bool = ..., min_count: int = ..., mask=...): ... -def nanmean(values, axis=..., skipna: bool = ..., mask=...): ... -def nanmedian(values, axis=..., skipna: bool = ..., mask=...): ... -def nanstd(values, axis=..., skipna: bool = ..., ddof: int = ..., mask=...): ... -def nanvar(values, axis=..., skipna: bool = ..., ddof: int = ..., mask=...): ... -def nansem(values, axis=..., skipna: bool = ..., ddof: int = ..., mask=...): ... - -nanmin = ... -nanmax = ... - -def nanargmax(values, axis=..., skipna: bool = ..., mask=...): ... -def nanargmin(values, axis=..., skipna: bool = ..., mask=...): ... -def nanskew(values, axis=..., skipna: bool = ..., mask=...): ... -def nankurt(values, axis=..., skipna: bool = ..., mask=...): ... -def nanprod(values, axis=..., skipna: bool = ..., min_count: int = ..., mask=...): ... -def nancorr(a, b, method: str = ..., min_periods=...): ... -def get_corr_func(method): ... -def nancov(a, b, min_periods=...): ... -def make_nancomp(op): ... - -nangt = ... -nange = ... -nanlt = ... -nanle = ... -naneq = ... -nanne = ... - -def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): ... diff --git a/pandas-stubs/core/sorting.pyi b/pandas-stubs/core/sorting.pyi deleted file mode 100644 index 76c15909..00000000 --- a/pandas-stubs/core/sorting.pyi +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Any - -def get_group_index(labels: Any, shape: Any, sort: bool, xnull: bool) -> Any: ... -def get_compressed_ids(labels: Any, sizes: Any): ... -def is_int64_overflow_possible(shape: Any) -> bool: ... -def decons_group_index(comp_labels: Any, shape: Any): ... -def decons_obs_group_ids( - comp_ids: Any, obs_ids: Any, shape: Any, labels: Any, xnull: bool -) -> Any: ... -def indexer_from_factorized(labels: Any, shape: Any, compress: bool = ...) -> Any: ... -def lexsort_indexer(keys: Any, orders: Any = ..., na_position: str = ...) -> Any: ... -def nargsort( - items: Any, kind: str = ..., ascending: bool = ..., na_position: str = ... -) -> Any: ... - -class _KeyMapper: - levels: Any = ... - labels: Any = ... - comp_ids: Any = ... - k: Any = ... - tables: Any = ... - def __init__( - self, comp_ids: Any, ngroups: int, levels: Any, labels: Any - ) -> None: ... - def get_key(self, comp_id: Any): ... - -def get_flattened_iterator(comp_ids: Any, ngroups: Any, levels: Any, labels: Any): ... -def get_indexer_dict(label_list: Any, keys: Any): ... -def get_group_index_sorter(group_index: Any, ngroups: int) -> Any: ... -def compress_group_index(group_index: Any, sort: bool = ...) -> Any: ... diff --git a/tests/test_pandas.py b/tests/test_pandas.py index e1529711..0657579c 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -4,10 +4,12 @@ from typing import ( TYPE_CHECKING, Any, + Union, ) import numpy as np import pandas as pd +from pandas.api.extensions import ExtensionArray import pytest from typing_extensions import assert_type @@ -181,3 +183,91 @@ def test_read_xml() -> None: ), pd.DataFrame, ) + + +def test_unique() -> None: + # Taken from the docs + check( + assert_type( + pd.unique(pd.Series([2, 1, 3, 3])), Union[np.ndarray, ExtensionArray] + ), + np.ndarray, + ) + + check( + assert_type( + pd.unique(pd.Series([2] + [1] * 5)), Union[np.ndarray, ExtensionArray] + ), + np.ndarray, + ) + + check( + assert_type( + pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])), + Union[np.ndarray, ExtensionArray], + ), + np.ndarray, + ) + + check( + assert_type( + pd.unique( + pd.Series( + [ + pd.Timestamp("20160101", tz="US/Eastern"), + pd.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ), + Union[np.ndarray, ExtensionArray], + ), + pd.arrays.DatetimeArray, + ) + check( + assert_type( + pd.unique( + pd.Index( + [ + pd.Timestamp("20160101", tz="US/Eastern"), + pd.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ), + pd.Index, + ), + pd.DatetimeIndex, + ) + + check(assert_type(pd.unique(list("baabc")), np.ndarray), np.ndarray) + + check( + assert_type( + pd.unique(pd.Series(pd.Categorical(list("baabc")))), + Union[np.ndarray, ExtensionArray], + ), + pd.Categorical, + ) + check( + assert_type( + pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))), + Union[np.ndarray, ExtensionArray], + ), + pd.Categorical, + ) + check( + assert_type( + pd.unique( + pd.Series( + pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) + ) + ), + Union[np.ndarray, ExtensionArray], + ), + pd.Categorical, + ) + check( + assert_type( + pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]), np.ndarray + ), + np.ndarray, + )