CLN: assorted

jbrockmendel · Oct 2, 2023 · 70a5b34 · 70a5b34
1 parent 7e68183
commit 70a5b34
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 15 deletions.
diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -127,7 +127,6 @@ generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number
 generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype
 generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype
 generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype
-generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period
 generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable
 generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re
 generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar

diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
@@ -528,7 +528,7 @@ If a test is known to fail but the manner in which it fails
 is not meant to be captured, use ``pytest.mark.xfail`` It is common to use this method for a test that
 exhibits buggy behavior or a non-implemented feature. If
 the failing test has flaky behavior, use the argument ``strict=False``. This
-will make it so pytest does not fail if the test happens to pass.
+will make it so pytest does not fail if the test happens to pass. Using ``strict=False`` is highly undesirable, please use it only as a last resort.
 
 Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param``
 over usage within a test so that the test is appropriately marked during the

diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -67,6 +67,8 @@ objects.
       api.extensions.ExtensionArray.ndim
       api.extensions.ExtensionArray.shape
       api.extensions.ExtensionArray.tolist
+      api.extensions.ExtensionArray.transpose
+      api.extensions.ExtensionArray.T
 
 Additionally, we have some utility methods for ensuring your object
 behaves correctly.

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3501,7 +3501,7 @@ def _intersection(self, other: Index, sort: bool = False):
                 pass
             else:
                 # TODO: algos.unique1d should preserve DTA/TDA
-                if is_numeric_dtype(self):
+                if is_numeric_dtype(self.dtype):
                     # This is faster, because Index.unique() checks for uniqueness
                     # before calculating the unique values.
                     res = algos.unique1d(res_indexer)
@@ -5013,7 +5013,10 @@ def _can_use_libjoin(self) -> bool:
             )
         # Exclude index types where the conversion to numpy converts to object dtype,
         #  which negates the performance benefit of libjoin
-        # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
+        # Subclasses should override to return False if _get_join_target is
+        #  not zero-copy.
+        # TODO: exclude RangeIndex (which allocates memory)?
+        #  Doing so seems to break test_concat_datetime_timezone
         return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))
 
     # --------------------------------------------------------------------
@@ -6169,8 +6172,8 @@ def _get_indexer_non_comparable(
             If doing an inequality check, i.e. method is not None.
         """
         if method is not None:
-            other = _unpack_nested_dtype(target)
-            raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
+            other_dtype = _unpack_nested_dtype(target)
+            raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")
 
         no_matches = -1 * np.ones(target.shape, dtype=np.intp)
         if unique:
@@ -6281,8 +6284,7 @@ def _should_compare(self, other: Index) -> bool:
             #  respectively.
             return False
 
-        other = _unpack_nested_dtype(other)
-        dtype = other.dtype
+        dtype = _unpack_nested_dtype(other)
         return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
@@ -7585,7 +7587,7 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
     return names
 
 
-def _unpack_nested_dtype(other: Index) -> Index:
+def _unpack_nested_dtype(other: Index) -> DtypeObj:
     """
     When checking if our dtype is comparable with another, we need
     to unpack CategoricalDtype to look at its categories.dtype.
@@ -7596,20 +7598,20 @@ def _unpack_nested_dtype(other: Index) -> Index:
 
     Returns
     -------
-    Index
+    np.dtype or ExtensionDtype
     """
     dtype = other.dtype
     if isinstance(dtype, CategoricalDtype):
         # If there is ever a SparseIndex, this could get dispatched
         #  here too.
-        return dtype.categories
+        return dtype.categories.dtype
     elif isinstance(dtype, ArrowDtype):
         # GH 53617
         import pyarrow as pa
 
         if pa.types.is_dictionary(dtype.pyarrow_dtype):
-            other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
-    return other
+            other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
+    return other.dtype
 
 
 def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4004,7 +4004,8 @@ def argsort(
 
         if mask.any():
             # TODO(3.0): once this deprecation is enforced we can call
-            #  self.array.argsort directly, which will close GH#43840
+            #  self.array.argsort directly, which will close GH#43840 and
+            #  GH#12694
             warnings.warn(
                 "The behavior of Series.argsort in the presence of NA values is "
                 "deprecated. In a future version, NA values will be ordered "

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -88,7 +88,7 @@ def get_indexer_indexer(
     # error: Incompatible types in assignment (expression has type
     # "Union[ExtensionArray, ndarray[Any, Any], Index, Series]", variable has
     # type "Index")
-    target = ensure_key_mapped(target, key, levels=level)  # type:ignore[assignment]
+    target = ensure_key_mapped(target, key, levels=level)  # type: ignore[assignment]
     target = target._sort_levels_monotonic()
 
     if level is not None:

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -2166,6 +2166,19 @@ def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
         result.loc[df.index, "data"] = ser._values
         tm.assert_frame_equal(result, df)
 
+    def test_loc_setitem_ea_not_full_column(self):
+        # GH#39163
+        df = DataFrame({"A": range(5)})
+
+        val = date_range("2016-01-01", periods=3, tz="US/Pacific")
+
+        df.loc[[0, 1, 2], "B"] = val
+
+        bex = val.append(DatetimeIndex([pd.NaT, pd.NaT], dtype=val.dtype))
+        expected = DataFrame({"A": range(5), "B": bex})
+        assert expected.dtypes["B"] == val.dtype
+        tm.assert_frame_equal(df, expected)
+
 
 class TestLocCallable:
     def test_frame_loc_getitem_callable(self):

diff --git a/pandas/tests/tslibs/test_npy_units.py b/pandas/tests/tslibs/test_npy_units.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
+from pandas._libs.tslibs.vectorized import is_date_array_normalized
+
+# a datetime64 ndarray which *is* normalized
+day_arr = np.arange(10, dtype="i8").view("M8[D]")
+
+
+class TestIsDateArrayNormalized:
+    def test_is_date_array_normalized_day(self):
+        arr = day_arr
+        abbrev = "D"
+        unit = abbrev_to_npy_unit(abbrev)
+        result = is_date_array_normalized(arr.view("i8"), None, unit)
+        assert result is True
+
+    def test_is_date_array_normalized_seconds(self):
+        abbrev = "s"
+        arr = day_arr.astype(f"M8[{abbrev}]")
+        unit = abbrev_to_npy_unit(abbrev)
+        result = is_date_array_normalized(arr.view("i8"), None, unit)
+        assert result is True
+
+        arr[0] += np.timedelta64(1, abbrev)
+        result2 = is_date_array_normalized(arr.view("i8"), None, unit)
+        assert result2 is False