CLEAN: remove nonpublic stubs (#170)

* CLEAN: remove nonpublic stubs * put back pd.value_counts()
pandas-dev · Jul 27, 2022 · 74f2b5a · 74f2b5a
1 parent 6525678
commit 74f2b5a
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 239 deletions.
diff --git a/pandas-stubs/api/extensions/__init__.pyi b/pandas-stubs/api/extensions/__init__.pyi
@@ -3,7 +3,6 @@ from pandas.core.accessor import (
     register_index_accessor as register_index_accessor,
     register_series_accessor as register_series_accessor,
 )
-from pandas.core.algorithms import take as take
 from pandas.core.arrays import (
     ExtensionArray as ExtensionArray,
     ExtensionScalarOpsMixin as ExtensionScalarOpsMixin,

diff --git a/pandas-stubs/core/algorithms.pyi b/pandas-stubs/core/algorithms.pyi
@@ -1,74 +1,40 @@
-from typing import Any
+from typing import (
+    Any,
+    overload,
+)
 
 import numpy as np
-from pandas.core.indexes.base import Index
-
-def unique(values): ...
-
-unique1d = unique
-
-def isin(comps, values) -> np.ndarray: ...
+from pandas import (
+    Categorical,
+    Index,
+    Series,
+)
+from pandas.api.extensions import ExtensionArray
+
+from pandas._typing import AnyArrayLike
+
+@overload
+def unique(values: Index) -> Index: ...
+@overload
+def unique(values: Categorical) -> Categorical: ...
+@overload
+def unique(values: Series) -> np.ndarray | ExtensionArray: ...
+@overload
+def unique(values: np.ndarray | list) -> np.ndarray: ...
+@overload
+def unique(values: ExtensionArray) -> ExtensionArray: ...
 def factorize(
     values: Any,
     sort: bool = ...,
-    na_sentinel: int = ...,
+    na_sentinel: int | None = ...,
+    use_na_sentinel: bool = ...,
     size_hint: int | None = ...,
 ) -> tuple[np.ndarray, np.ndarray | Index]: ...
 def value_counts(
-    values,
+    values: AnyArrayLike | list | tuple,
     sort: bool = ...,
     ascending: bool = ...,
     normalize: bool = ...,
-    bins=...,
+    bins: int | None = ...,
     dropna: bool = ...,
 ) -> Series: ...
-def duplicated(values, keep=...) -> np.ndarray: ...
-def mode(values, dropna: bool = ...) -> Series: ...
-def rank(
-    values,
-    axis: int = ...,
-    method: str = ...,
-    na_option: str = ...,
-    ascending: bool = ...,
-    pct: bool = ...,
-): ...
-def checked_add_with_arr(arr, b, arr_mask=..., b_mask=...): ...
-def quantile(x, q, interpolation_method: str = ...): ...
-
-class SelectN:
-    obj = ...
-    n = ...
-    keep = ...
-    def __init__(self, obj, n: int, keep: str) -> None: ...
-    def nlargest(self): ...
-    def nsmallest(self): ...
-    @staticmethod
-    def is_valid_dtype_n_method(dtype) -> bool: ...
-
-class SelectNSeries(SelectN):
-    def compute(self, method): ...
-
-class SelectNFrame(SelectN):
-    columns = ...
-    def __init__(self, obj, n: int, keep: str, columns) -> None: ...
-    def compute(self, method): ...
-
-def take(arr, indices, axis: int = ..., allow_fill: bool = ..., fill_value=...): ...
-def take_nd(
-    arr, indexer, axis: int = ..., out=..., fill_value=..., allow_fill: bool = ...
-): ...
-
-take_1d = take_nd
-
-def take_2d_multi(arr, indexer, fill_value=...): ...
-def searchsorted(arr, value, side: str = ..., sorter=...): ...
-def diff(arr, n: int, axis: int = ..., stacklevel=...): ...
-def safe_sort(
-    values,
-    codes=...,
-    na_sentinel: int = ...,
-    assume_unique: bool = ...,
-    verify: bool = ...,
-) -> np.ndarray | tuple[np.ndarray, np.ndarray]: ...
-
-from pandas import Series
diff --git a/pandas-stubs/core/apply.pyi b/pandas-stubs/core/apply.pyi
diff --git a/pandas-stubs/core/nanops.pyi b/pandas-stubs/core/nanops.pyi
diff --git a/pandas-stubs/core/sorting.pyi b/pandas-stubs/core/sorting.pyi
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -4,10 +4,12 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Union,
 )
 
 import numpy as np
 import pandas as pd
+from pandas.api.extensions import ExtensionArray
 import pytest
 from typing_extensions import assert_type
 
@@ -181,3 +183,91 @@ def test_read_xml() -> None:
             ),
             pd.DataFrame,
         )
+
+
+def test_unique() -> None:
+    # Taken from the docs
+    check(
+        assert_type(
+            pd.unique(pd.Series([2, 1, 3, 3])), Union[np.ndarray, ExtensionArray]
+        ),
+        np.ndarray,
+    )
+
+    check(
+        assert_type(
+            pd.unique(pd.Series([2] + [1] * 5)), Union[np.ndarray, ExtensionArray]
+        ),
+        np.ndarray,
+    )
+
+    check(
+        assert_type(
+            pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])),
+            Union[np.ndarray, ExtensionArray],
+        ),
+        np.ndarray,
+    )
+
+    check(
+        assert_type(
+            pd.unique(
+                pd.Series(
+                    [
+                        pd.Timestamp("20160101", tz="US/Eastern"),
+                        pd.Timestamp("20160101", tz="US/Eastern"),
+                    ]
+                )
+            ),
+            Union[np.ndarray, ExtensionArray],
+        ),
+        pd.arrays.DatetimeArray,
+    )
+    check(
+        assert_type(
+            pd.unique(
+                pd.Index(
+                    [
+                        pd.Timestamp("20160101", tz="US/Eastern"),
+                        pd.Timestamp("20160101", tz="US/Eastern"),
+                    ]
+                )
+            ),
+            pd.Index,
+        ),
+        pd.DatetimeIndex,
+    )
+
+    check(assert_type(pd.unique(list("baabc")), np.ndarray), np.ndarray)
+
+    check(
+        assert_type(
+            pd.unique(pd.Series(pd.Categorical(list("baabc")))),
+            Union[np.ndarray, ExtensionArray],
+        ),
+        pd.Categorical,
+    )
+    check(
+        assert_type(
+            pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))),
+            Union[np.ndarray, ExtensionArray],
+        ),
+        pd.Categorical,
+    )
+    check(
+        assert_type(
+            pd.unique(
+                pd.Series(
+                    pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
+                )
+            ),
+            Union[np.ndarray, ExtensionArray],
+        ),
+        pd.Categorical,
+    )
+    check(
+        assert_type(
+            pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]), np.ndarray
+        ),
+        np.ndarray,
+    )