DEPS: Test NEP 50 (pandas-dev#55739)

* DEPS: Test NEP 50 * Use Python floats in test_maybe_promote_float_with_float * Refactor test_to_html_multiindex to allow tests to collect * Supress deprecationwarning for now * Use old invocation * Use Python ints in _range.py functions * Address test_constructor * Fix test_constructor_coercion_signed_to_unsigned * Fix test_constructor_coercion_signed_to_unsigned * Cast numpy scalars as python scalars before arith ops * add xfail reason to TestCoercionFloat32 * only set promotion state for numpy > 2.0 * order was backwards * Version promotion state call * fix timedelta tests * go for green * fix non npdev too? * fixes * adjust xfail condition * go for green * add tests * add negative numbers test * updates * fix accidental changes * more * simplify * linter --------- Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com>
TomAugspurger · Dec 22, 2023 · e0e47e8 · e0e47e8
1 parent 0d8a0f3
commit e0e47e8
Show file tree

Hide file tree

Showing 15 changed files with 177 additions and 39 deletions.
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -92,7 +92,7 @@ jobs:
           - name: "Numpy Dev"
             env_file: actions-311-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
-            test_args: "-W error::DeprecationWarning -W error::FutureWarning"
+            test_args: "-W error::FutureWarning"
           - name: "Pyarrow Nightly"
             env_file: actions-311-pyarrownightly.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -115,6 +115,7 @@ jobs:
       TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
+      NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
       # Clipboard tests
       QT_QPA_PLATFORM: offscreen
     concurrency:

diff --git a/ci/run_tests.sh b/ci/run_tests.sh
@@ -10,8 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED
 
 COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml"
 
-# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE
-PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
+PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
 
 if [[ "$PATTERN" ]]; then
   PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -2060,6 +2060,12 @@ class Timedelta(_Timedelta):
             # integers or floats
             if util.is_nan(other):
                 return NaT
+            # We want NumPy numeric scalars to behave like Python scalars
+            # post NEP 50
+            if isinstance(other, cnp.integer):
+                other = int(other)
+            if isinstance(other, cnp.floating):
+                other = float(other)
             return Timedelta._from_value_and_reso(
                 <int64_t>(self._value/ other), self._creso
             )
@@ -2114,6 +2120,12 @@ class Timedelta(_Timedelta):
         elif is_integer_object(other) or is_float_object(other):
             if util.is_nan(other):
                 return NaT
+            # We want NumPy numeric scalars to behave like Python scalars
+            # post NEP 50
+            if isinstance(other, cnp.integer):
+                other = int(other)
+            if isinstance(other, cnp.floating):
+                other = float(other)
             return type(self)._from_value_and_reso(self._value// other, self._creso)
 
         elif is_array(other):

diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py
@@ -54,8 +54,8 @@ def generate_regular_range(
     iend = end._value if end is not None else None
     freq.nanos  # raises if non-fixed frequency
     td = Timedelta(freq)
-    b: int | np.int64 | np.uint64
-    e: int | np.int64 | np.uint64
+    b: int
+    e: int
     try:
         td = td.as_unit(unit, round_ok=False)
     except ValueError as err:
@@ -96,7 +96,7 @@ def generate_regular_range(
 
 def _generate_range_overflow_safe(
     endpoint: int, periods: int, stride: int, side: str = "start"
-) -> np.int64 | np.uint64:
+) -> int:
     """
     Calculate the second endpoint for passing to np.arange, checking
     to avoid an integer overflow.  Catch OverflowError and re-raise
@@ -115,7 +115,7 @@ def _generate_range_overflow_safe(
 
     Returns
     -------
-    other_end : np.int64 | np.uint64
+    other_end : int
 
     Raises
     ------
@@ -163,7 +163,7 @@ def _generate_range_overflow_safe(
 
 def _generate_range_overflow_safe_signed(
     endpoint: int, periods: int, stride: int, side: str
-) -> np.int64 | np.uint64:
+) -> int:
     """
     A special case for _generate_range_overflow_safe where `periods * stride`
     can be calculated without overflowing int64 bounds.
@@ -181,7 +181,7 @@ def _generate_range_overflow_safe_signed(
                 # Putting this into a DatetimeArray/TimedeltaArray
                 #  would incorrectly be interpreted as NaT
                 raise OverflowError
-            return result
+            return int(result)
         except (FloatingPointError, OverflowError):
             # with endpoint negative and addend positive we risk
             #  FloatingPointError; with reversed signed we risk OverflowError
@@ -200,7 +200,7 @@ def _generate_range_overflow_safe_signed(
             i64max = np.uint64(i8max)
             assert uresult > i64max
             if uresult <= i64max + np.uint64(stride):
-                return uresult
+                return int(uresult)
 
     raise OutOfBoundsDatetime(
         f"Cannot generate range with {side}={endpoint} and periods={periods}"

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -39,6 +39,7 @@
     is_supported_dtype,
 )
 from pandas._libs.tslibs.timedeltas import array_to_timedelta64
+from pandas.compat.numpy import np_version_gt2
 from pandas.errors import (
     IntCastingNaNError,
     LossySetitemError,
@@ -1314,6 +1315,30 @@ def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
         #  which will make us upcast too far.
         if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
             right = int(right)
+        # After NEP 50, numpy won't inspect Python scalars
+        # TODO: do we need to recreate numpy's inspection logic for floats too
+        # (this breaks some tests)
+        if isinstance(right, int) and not isinstance(right, np.integer):
+            # This gives an unsigned type by default
+            # (if our number is positive)
+
+            # If our left dtype is signed, we might not want this since
+            # this might give us 1 dtype too big
+            # We should check if the corresponding int dtype (e.g. int64 for uint64)
+            # can hold the number
+            right_dtype = np.min_scalar_type(right)
+            if right == 0:
+                # Special case 0
+                right = left_dtype
+            elif (
+                not np.issubdtype(left_dtype, np.unsignedinteger)
+                and 0 < right <= 2 ** (8 * right_dtype.itemsize - 1) - 1
+            ):
+                # If left dtype isn't unsigned, check if it fits in the signed dtype
+                right = np.dtype(f"i{right_dtype.itemsize}")
+            else:
+                right = right_dtype
+
         new_dtype = np.result_type(left_dtype, right)
 
     elif is_valid_na_for_dtype(right, left_dtype):
@@ -1619,11 +1644,13 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
             with warnings.catch_warnings():
                 # We already disallow dtype=uint w/ negative numbers
                 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore.
-                warnings.filterwarnings(
-                    "ignore",
-                    "NumPy will stop allowing conversion of out-of-bound Python int",
-                    DeprecationWarning,
-                )
+                if not np_version_gt2:
+                    warnings.filterwarnings(
+                        "ignore",
+                        "NumPy will stop allowing conversion of "
+                        "out-of-bound Python int",
+                        DeprecationWarning,
+                    )
                 casted = np.array(arr, dtype=dtype, copy=False)
         else:
             with warnings.catch_warnings():
@@ -1660,6 +1687,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
         raise ValueError(f"string values cannot be losslessly cast to {dtype}")
 
     if dtype.kind == "u" and (arr < 0).any():
+        # TODO: can this be hit anymore after numpy 2.0?
         raise OverflowError("Trying to coerce negative values to unsigned integers")
 
     if arr.dtype.kind == "f":
@@ -1672,6 +1700,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
         raise ValueError("Trying to coerce float values to integers")
 
     if casted.dtype < arr.dtype:
+        # TODO: Can this path be hit anymore with numpy > 2
         # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
         raise ValueError(
             f"Values are too large to be losslessly converted to {dtype}. "

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
@@ -570,6 +570,14 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
         #  np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
         return Timedelta(obj)
 
+    # We want NumPy numeric scalars to behave like Python scalars
+    # post NEP 50
+    elif isinstance(obj, np.integer):
+        return int(obj)
+
+    elif isinstance(obj, np.floating):
+        return float(obj)
+
     return obj
 
 

diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py
@@ -229,24 +229,24 @@ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
     [
         # float filled with float
         ("float32", 1, "float32"),
-        ("float32", np.finfo("float32").max * 1.1, "float64"),
+        ("float32", float(np.finfo("float32").max) * 1.1, "float64"),
         ("float64", 1, "float64"),
-        ("float64", np.finfo("float32").max * 1.1, "float64"),
+        ("float64", float(np.finfo("float32").max) * 1.1, "float64"),
         # complex filled with float
         ("complex64", 1, "complex64"),
-        ("complex64", np.finfo("float32").max * 1.1, "complex128"),
+        ("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
         ("complex128", 1, "complex128"),
-        ("complex128", np.finfo("float32").max * 1.1, "complex128"),
+        ("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
         # float filled with complex
         ("float32", 1 + 1j, "complex64"),
-        ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"),
+        ("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
         ("float64", 1 + 1j, "complex128"),
-        ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
+        ("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
         # complex filled with complex
         ("complex64", 1 + 1j, "complex64"),
-        ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
+        ("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
         ("complex128", 1 + 1j, "complex128"),
-        ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"),
+        ("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
     ],
 )
 def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -33,8 +33,10 @@
     missing as libmissing,
     ops as libops,
 )
+from pandas.compat.numpy import np_version_gt2
 
 from pandas.core.dtypes import inference
+from pandas.core.dtypes.cast import find_result_type
 from pandas.core.dtypes.common import (
     ensure_int32,
     is_bool,
@@ -1995,3 +1997,51 @@ def test_ensure_int32():
     values = np.arange(10, dtype=np.int64)
     result = ensure_int32(values)
     assert result.dtype == np.int32
+
+
+@pytest.mark.parametrize(
+    "right,result",
+    [
+        (0, np.uint8),
+        (-1, np.int16),
+        (300, np.uint16),
+        # For floats, we just upcast directly to float64 instead of trying to
+        # find a smaller floating dtype
+        (300.0, np.uint16),  # for integer floats, we convert them to ints
+        (300.1, np.float64),
+        (np.int16(300), np.int16 if np_version_gt2 else np.uint16),
+    ],
+)
+def test_find_result_type_uint_int(right, result):
+    left_dtype = np.dtype("uint8")
+    assert find_result_type(left_dtype, right) == result
+
+
+@pytest.mark.parametrize(
+    "right,result",
+    [
+        (0, np.int8),
+        (-1, np.int8),
+        (300, np.int16),
+        # For floats, we just upcast directly to float64 instead of trying to
+        # find a smaller floating dtype
+        (300.0, np.int16),  # for integer floats, we convert them to ints
+        (300.1, np.float64),
+        (np.int16(300), np.int16),
+    ],
+)
+def test_find_result_type_int_int(right, result):
+    left_dtype = np.dtype("int8")
+    assert find_result_type(left_dtype, right) == result
+
+
+@pytest.mark.parametrize(
+    "right,result",
+    [
+        (300.0, np.float64),
+        (np.float32(300), np.float32),
+    ],
+)
+def test_find_result_type_floats(right, result):
+    left_dtype = np.dtype("float16")
+    assert find_result_type(left_dtype, right) == result
diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py
@@ -354,11 +354,13 @@ def test_constructor(self, dtype):
         arr = index.values.copy()
         new_index = index_cls(arr, copy=True)
         tm.assert_index_equal(new_index, index, exact=True)
-        val = arr[0] + 3000
+        val = int(arr[0]) + 3000
 
         # this should not change index
-        arr[0] = val
-        assert new_index[0] != val
+        if dtype != np.int8:
+            # NEP 50 won't allow assignment that would overflow
+            arr[0] = val
+            assert new_index[0] != val
 
         if dtype == np.int64:
             # pass list, coerce fine
@@ -407,8 +409,12 @@ def test_constructor_coercion_signed_to_unsigned(
         any_unsigned_int_numpy_dtype,
     ):
         # see gh-15832
-        msg = "Trying to coerce negative values to unsigned integers"
-
+        msg = "|".join(
+            [
+                "Trying to coerce negative values to unsigned integers",
+                "The elements provided in the data cannot all be casted",
+            ]
+        )
         with pytest.raises(OverflowError, match=msg):
             Index([-1], dtype=any_unsigned_int_numpy_dtype)
 

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -15,6 +15,7 @@
     IS64,
     is_platform_windows,
 )
+from pandas.compat.numpy import np_version_gt2
 
 import pandas as pd
 import pandas._testing as tm
@@ -226,6 +227,8 @@ def test_insert_int_index(
         "insert, coerced_val, coerced_dtype",
         [
             (1, 1.0, None),
+            # When float_numpy_dtype=float32, this is not the case
+            # see the correction below
             (1.1, 1.1, np.float64),
             (False, False, object),  # GH#36319
             ("x", "x", object),
@@ -238,6 +241,10 @@ def test_insert_float_index(
         obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
         coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
 
+        if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
+            # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
+            # the expected dtype will be float32 if the original dtype was float32
+            coerced_dtype = np.float32
         exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
         self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
 

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -15,6 +15,7 @@
 from pandas._config import using_pyarrow_string_dtype
 
 from pandas._libs import index as libindex
+from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IndexingError
 import pandas.util._test_decorators as td
 
@@ -3020,7 +3021,15 @@ def test_loc_setitem_uint8_upcast(value):
     with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
         df.loc[2, "col1"] = value  # value that can't be held in uint8
 
-    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
+    if np_version_gt2 and isinstance(value, np.int16):
+        # Note, result type of uint8 + int16 is int16
+        # in numpy < 2, though, numpy would inspect the
+        # value and see that it could fit in an uint16, resulting in a uint16
+        dtype = "int16"
+    else:
+        dtype = "uint16"
+
+    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
     tm.assert_frame_equal(df, expected)
 
 

diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
@@ -419,15 +419,15 @@ def test_to_html_columns_arg(float_frame):
     "columns,justify,expected",
     [
         (
-            MultiIndex.from_tuples(
-                list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
+            MultiIndex.from_arrays(
+                [np.arange(2).repeat(2), np.mod(range(4), 2)],
                 names=["CL0", "CL1"],
             ),
             "left",
             "multiindex_1",
         ),
         (
-            MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
+            MultiIndex.from_arrays([np.arange(4), np.mod(range(4), 2)]),
             "right",
             "multiindex_2",
         ),