Skip to content

Commit

Permalink
DEPS: Test NEP 50 (pandas-dev#55739)
Browse files Browse the repository at this point in the history
* DEPS: Test NEP 50

* Use Python floats in test_maybe_promote_float_with_float

* Refactor test_to_html_multiindex to allow tests to collect

* Supress deprecationwarning for now

* Use old invocation

* Use Python ints in _range.py functions

* Address test_constructor

* Fix test_constructor_coercion_signed_to_unsigned

* Fix test_constructor_coercion_signed_to_unsigned

* Cast numpy scalars as python scalars before arith ops

* add xfail reason to TestCoercionFloat32

* only set promotion state for numpy > 2.0

* order was backwards

* Version promotion state call

* fix timedelta tests

* go for green

* fix non npdev too?

* fixes

* adjust xfail condition

* go for green

* add tests

* add negative numbers test

* updates

* fix accidental changes

* more

* simplify

* linter

---------

Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com>
  • Loading branch information
mroeschke and lithomas1 committed Dec 22, 2023
1 parent 0d8a0f3 commit e0e47e8
Show file tree
Hide file tree
Showing 15 changed files with 177 additions and 39 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
- name: "Numpy Dev"
env_file: actions-311-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
test_args: "-W error::FutureWarning"
- name: "Pyarrow Nightly"
env_file: actions-311-pyarrownightly.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -115,6 +115,7 @@ jobs:
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
concurrency:
Expand Down
3 changes: 1 addition & 2 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED

COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml"

# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE
PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"

if [[ "$PATTERN" ]]; then
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
Expand Down
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2060,6 +2060,12 @@ class Timedelta(_Timedelta):
# integers or floats
if util.is_nan(other):
return NaT
# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
if isinstance(other, cnp.integer):
other = int(other)
if isinstance(other, cnp.floating):
other = float(other)
return Timedelta._from_value_and_reso(
<int64_t>(self._value/ other), self._creso
)
Expand Down Expand Up @@ -2114,6 +2120,12 @@ class Timedelta(_Timedelta):
elif is_integer_object(other) or is_float_object(other):
if util.is_nan(other):
return NaT
# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
if isinstance(other, cnp.integer):
other = int(other)
if isinstance(other, cnp.floating):
other = float(other)
return type(self)._from_value_and_reso(self._value// other, self._creso)

elif is_array(other):
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def generate_regular_range(
iend = end._value if end is not None else None
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
b: int | np.int64 | np.uint64
e: int | np.int64 | np.uint64
b: int
e: int
try:
td = td.as_unit(unit, round_ok=False)
except ValueError as err:
Expand Down Expand Up @@ -96,7 +96,7 @@ def generate_regular_range(

def _generate_range_overflow_safe(
endpoint: int, periods: int, stride: int, side: str = "start"
) -> np.int64 | np.uint64:
) -> int:
"""
Calculate the second endpoint for passing to np.arange, checking
to avoid an integer overflow. Catch OverflowError and re-raise
Expand All @@ -115,7 +115,7 @@ def _generate_range_overflow_safe(
Returns
-------
other_end : np.int64 | np.uint64
other_end : int
Raises
------
Expand Down Expand Up @@ -163,7 +163,7 @@ def _generate_range_overflow_safe(

def _generate_range_overflow_safe_signed(
endpoint: int, periods: int, stride: int, side: str
) -> np.int64 | np.uint64:
) -> int:
"""
A special case for _generate_range_overflow_safe where `periods * stride`
can be calculated without overflowing int64 bounds.
Expand All @@ -181,7 +181,7 @@ def _generate_range_overflow_safe_signed(
# Putting this into a DatetimeArray/TimedeltaArray
# would incorrectly be interpreted as NaT
raise OverflowError
return result
return int(result)
except (FloatingPointError, OverflowError):
# with endpoint negative and addend positive we risk
# FloatingPointError; with reversed signed we risk OverflowError
Expand All @@ -200,7 +200,7 @@ def _generate_range_overflow_safe_signed(
i64max = np.uint64(i8max)
assert uresult > i64max
if uresult <= i64max + np.uint64(stride):
return uresult
return int(uresult)

raise OutOfBoundsDatetime(
f"Cannot generate range with {side}={endpoint} and periods={periods}"
Expand Down
39 changes: 34 additions & 5 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
is_supported_dtype,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas.compat.numpy import np_version_gt2
from pandas.errors import (
IntCastingNaNError,
LossySetitemError,
Expand Down Expand Up @@ -1314,6 +1315,30 @@ def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
# which will make us upcast too far.
if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
right = int(right)
# After NEP 50, numpy won't inspect Python scalars
# TODO: do we need to recreate numpy's inspection logic for floats too
# (this breaks some tests)
if isinstance(right, int) and not isinstance(right, np.integer):
# This gives an unsigned type by default
# (if our number is positive)

# If our left dtype is signed, we might not want this since
# this might give us 1 dtype too big
# We should check if the corresponding int dtype (e.g. int64 for uint64)
# can hold the number
right_dtype = np.min_scalar_type(right)
if right == 0:
# Special case 0
right = left_dtype
elif (
not np.issubdtype(left_dtype, np.unsignedinteger)
and 0 < right <= 2 ** (8 * right_dtype.itemsize - 1) - 1
):
# If left dtype isn't unsigned, check if it fits in the signed dtype
right = np.dtype(f"i{right_dtype.itemsize}")
else:
right = right_dtype

new_dtype = np.result_type(left_dtype, right)

elif is_valid_na_for_dtype(right, left_dtype):
Expand Down Expand Up @@ -1619,11 +1644,13 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
with warnings.catch_warnings():
# We already disallow dtype=uint w/ negative numbers
# (test_constructor_coercion_signed_to_unsigned) so safe to ignore.
warnings.filterwarnings(
"ignore",
"NumPy will stop allowing conversion of out-of-bound Python int",
DeprecationWarning,
)
if not np_version_gt2:
warnings.filterwarnings(
"ignore",
"NumPy will stop allowing conversion of "
"out-of-bound Python int",
DeprecationWarning,
)
casted = np.array(arr, dtype=dtype, copy=False)
else:
with warnings.catch_warnings():
Expand Down Expand Up @@ -1660,6 +1687,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
raise ValueError(f"string values cannot be losslessly cast to {dtype}")

if dtype.kind == "u" and (arr < 0).any():
# TODO: can this be hit anymore after numpy 2.0?
raise OverflowError("Trying to coerce negative values to unsigned integers")

if arr.dtype.kind == "f":
Expand All @@ -1672,6 +1700,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
raise ValueError("Trying to coerce float values to integers")

if casted.dtype < arr.dtype:
# TODO: Can this path be hit anymore with numpy > 2
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
raise ValueError(
f"Values are too large to be losslessly converted to {dtype}. "
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,14 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
return Timedelta(obj)

# We want NumPy numeric scalars to behave like Python scalars
# post NEP 50
elif isinstance(obj, np.integer):
return int(obj)

elif isinstance(obj, np.floating):
return float(obj)

return obj


Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,24 +229,24 @@ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
[
# float filled with float
("float32", 1, "float32"),
("float32", np.finfo("float32").max * 1.1, "float64"),
("float32", float(np.finfo("float32").max) * 1.1, "float64"),
("float64", 1, "float64"),
("float64", np.finfo("float32").max * 1.1, "float64"),
("float64", float(np.finfo("float32").max) * 1.1, "float64"),
# complex filled with float
("complex64", 1, "complex64"),
("complex64", np.finfo("float32").max * 1.1, "complex128"),
("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
("complex128", 1, "complex128"),
("complex128", np.finfo("float32").max * 1.1, "complex128"),
("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
# float filled with complex
("float32", 1 + 1j, "complex64"),
("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
("float64", 1 + 1j, "complex128"),
("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
# complex filled with complex
("complex64", 1 + 1j, "complex64"),
("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
("complex128", 1 + 1j, "complex128"),
("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"),
("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
],
)
def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
missing as libmissing,
ops as libops,
)
from pandas.compat.numpy import np_version_gt2

from pandas.core.dtypes import inference
from pandas.core.dtypes.cast import find_result_type
from pandas.core.dtypes.common import (
ensure_int32,
is_bool,
Expand Down Expand Up @@ -1995,3 +1997,51 @@ def test_ensure_int32():
values = np.arange(10, dtype=np.int64)
result = ensure_int32(values)
assert result.dtype == np.int32


@pytest.mark.parametrize(
"right,result",
[
(0, np.uint8),
(-1, np.int16),
(300, np.uint16),
# For floats, we just upcast directly to float64 instead of trying to
# find a smaller floating dtype
(300.0, np.uint16), # for integer floats, we convert them to ints
(300.1, np.float64),
(np.int16(300), np.int16 if np_version_gt2 else np.uint16),
],
)
def test_find_result_type_uint_int(right, result):
left_dtype = np.dtype("uint8")
assert find_result_type(left_dtype, right) == result


@pytest.mark.parametrize(
"right,result",
[
(0, np.int8),
(-1, np.int8),
(300, np.int16),
# For floats, we just upcast directly to float64 instead of trying to
# find a smaller floating dtype
(300.0, np.int16), # for integer floats, we convert them to ints
(300.1, np.float64),
(np.int16(300), np.int16),
],
)
def test_find_result_type_int_int(right, result):
left_dtype = np.dtype("int8")
assert find_result_type(left_dtype, right) == result


@pytest.mark.parametrize(
"right,result",
[
(300.0, np.float64),
(np.float32(300), np.float32),
],
)
def test_find_result_type_floats(right, result):
left_dtype = np.dtype("float16")
assert find_result_type(left_dtype, right) == result
16 changes: 11 additions & 5 deletions pandas/tests/indexes/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,13 @@ def test_constructor(self, dtype):
arr = index.values.copy()
new_index = index_cls(arr, copy=True)
tm.assert_index_equal(new_index, index, exact=True)
val = arr[0] + 3000
val = int(arr[0]) + 3000

# this should not change index
arr[0] = val
assert new_index[0] != val
if dtype != np.int8:
# NEP 50 won't allow assignment that would overflow
arr[0] = val
assert new_index[0] != val

if dtype == np.int64:
# pass list, coerce fine
Expand Down Expand Up @@ -407,8 +409,12 @@ def test_constructor_coercion_signed_to_unsigned(
any_unsigned_int_numpy_dtype,
):
# see gh-15832
msg = "Trying to coerce negative values to unsigned integers"

msg = "|".join(
[
"Trying to coerce negative values to unsigned integers",
"The elements provided in the data cannot all be casted",
]
)
with pytest.raises(OverflowError, match=msg):
Index([-1], dtype=any_unsigned_int_numpy_dtype)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -226,6 +227,8 @@ def test_insert_int_index(
"insert, coerced_val, coerced_dtype",
[
(1, 1.0, None),
# When float_numpy_dtype=float32, this is not the case
# see the correction below
(1.1, 1.1, np.float64),
(False, False, object), # GH#36319
("x", "x", object),
Expand All @@ -238,6 +241,10 @@ def test_insert_float_index(
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype

if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
# the expected dtype will be float32 if the original dtype was float32
coerced_dtype = np.float32
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas._config import using_pyarrow_string_dtype

from pandas._libs import index as libindex
from pandas.compat.numpy import np_version_gt2
from pandas.errors import IndexingError
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -3020,7 +3021,15 @@ def test_loc_setitem_uint8_upcast(value):
with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
df.loc[2, "col1"] = value # value that can't be held in uint8

expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
if np_version_gt2 and isinstance(value, np.int16):
# Note, result type of uint8 + int16 is int16
# in numpy < 2, though, numpy would inspect the
# value and see that it could fit in an uint16, resulting in a uint16
dtype = "int16"
else:
dtype = "uint16"

expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
tm.assert_frame_equal(df, expected)


Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/formats/test_to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,15 +419,15 @@ def test_to_html_columns_arg(float_frame):
"columns,justify,expected",
[
(
MultiIndex.from_tuples(
list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
MultiIndex.from_arrays(
[np.arange(2).repeat(2), np.mod(range(4), 2)],
names=["CL0", "CL1"],
),
"left",
"multiindex_1",
),
(
MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
MultiIndex.from_arrays([np.arange(4), np.mod(range(4), 2)]),
"right",
"multiindex_2",
),
Expand Down
Loading

0 comments on commit e0e47e8

Please sign in to comment.