Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable many complex number tests #54761

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
6125494
Enable many complex number tests
MichaelTiemannOSC Aug 25, 2023
e7a285a
Update v2.1.0.rst
MichaelTiemannOSC Aug 25, 2023
02719d9
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Aug 29, 2023
f9bfeb9
Fix merge error in test_decimal.py
MichaelTiemannOSC Aug 29, 2023
077213f
Simplify test_fillna_no_op_returns_copy
MichaelTiemannOSC Aug 29, 2023
9fda0ef
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Sep 8, 2023
d25baa2
changes from review
MichaelTiemannOSC Sep 8, 2023
ad841bf
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Sep 8, 2023
7535374
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Sep 22, 2023
7ef6052
Use LSP parameter style for request
MichaelTiemannOSC Sep 22, 2023
f1139f5
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 10, 2023
19d3127
Handle complex128 EA in _ensure_data
MichaelTiemannOSC Oct 11, 2023
67e2dbc
Fix mypy pre-commit problems
MichaelTiemannOSC Oct 12, 2023
909ced4
Remove some LSP sigs for _get_expected_exception
MichaelTiemannOSC Oct 13, 2023
48cb330
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 13, 2023
bc96021
Additional `requests` removed; indentation fix
MichaelTiemannOSC Oct 13, 2023
d98e6f0
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 14, 2023
dabaf6f
Keep rval refs alive in StringHashTable._unique
MichaelTiemannOSC Oct 15, 2023
61c9b32
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Nov 4, 2023
6ed24ad
Code review changes
MichaelTiemannOSC Nov 4, 2023
e923878
Fix incomplete removal of `keep_rval_refs`
MichaelTiemannOSC Nov 4, 2023
5efad33
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Dec 9, 2023
51450c8
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Dec 9, 2023
c31b213
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Jan 5, 2024
9473130
Update io.py
MichaelTiemannOSC Jan 5, 2024
a86c896
Update test_numpy.py
MichaelTiemannOSC Jan 5, 2024
de56177
Update test_numpy.py
MichaelTiemannOSC Jan 5, 2024
198a16d
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 5, 2024
554a5c3
Update ops.py
MichaelTiemannOSC Jan 6, 2024
6ddb7f7
Update test_decimal.py
MichaelTiemannOSC Jan 6, 2024
c4a17a7
Further simplifications due to upstream
MichaelTiemannOSC Jan 6, 2024
040c98b
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
3a58f5a
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
29aa747
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
5210c8b
setitem exceptions for complex raise ValueError
MichaelTiemannOSC Jan 9, 2024
9f4bea5
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 16, 2024
be1f02b
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 23, 2024
b3edefa
Update _mixins.py
MichaelTiemannOSC Jan 23, 2024
89ea60b
Incorporate feedback
MichaelTiemannOSC Jan 31, 2024
4dc3bea
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Mar 22, 2024
4e273fa
Update test_sparse.py
MichaelTiemannOSC Mar 22, 2024
abfdedb
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Mar 29, 2024
59b50c9
Update algorithms.py
MichaelTiemannOSC Mar 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
return np.asarray(values)

elif is_complex_dtype(values.dtype):
# error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]"
# has no attribute "itemsize"
if values.dtype.itemsize in [32, 24, 16, 8]: # type: ignore[union-attr]
# The test suite tests support for complex128; we presume that
# complex64, complex192, and complex256 work as well
return np.asarray(values)
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
return cast(np.ndarray, values)

# datetimelike
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,13 @@ def shift(self, periods: int = 1, fill_value=None) -> Self:
def __setitem__(self, key, value) -> None:
key = check_array_indexer(self, key)
value = self._validate_setitem_value(value)
self._ndarray[key] = value
try:
self._ndarray[key] = value
except TypeError as exc:
# Note: when `self._ndarray.dtype.kind == "c"`, Numpy incorrectly complains
# that `must be real number, not ...` when in reality
# a complex argument is more likely what's expected
raise ValueError(exc.args) from exc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the comment here is helpful, thanks. it suggests to me that we may want to only catch-and-re-raise in a subset of cases? otherwise we'll be re-raising as ValueError more often than we really want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously @mroeschke questioned when ever this would be a TypeError in the first place...the above assignment was expected only ever raise a ValueError. So, instead of trying to also handle TypeError in higher-level code, we translate this "impossible" case into the canonical form that Pandas expects. The comment helps the user understand a completely unhelpful error message that comes from Python. Previously I attempted to edit the error message to something more reasonable, but that was challenged. At the end of the day the question is: how much steering do we want to do for this case vs. just letting the exception raise in the expected way and let users decipher what was wrong with their code in the first place.


def _validate_setitem_value(self, value):
return value
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ def _astype_nansafe(
elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
return _astype_float_to_int_nansafe(arr, dtype, copy)

elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype):
res = arr.astype(dtype, copy=copy)
res[np.isnan(arr)] = np.nan
return res

elif arr.dtype == object:
# if we have a datetime/timedelta array of objects
# then coerce to datetime64[ns] and use DatetimeArray.astype
Expand Down
34 changes: 24 additions & 10 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,16 +993,30 @@ def nanvar(
values = values.copy()
np.putmask(values, mask, 0)

# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
sqr = _ensure_numeric((avg - values) ** 2)
if values.dtype.kind == "c":
MichaelTiemannOSC marked this conversation as resolved.
Show resolved Hide resolved
# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance...
# ...but also,
# see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar
# which explains why computing the variance of complex numbers
# requires first normalizing the complex differences to magnitudes
avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
deltas = _ensure_numeric(avg - values)
avg_re = np.real(deltas)
avg_im = np.imag(deltas)
sqr = avg_re**2 + avg_im**2
else:
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
sqr = _ensure_numeric((avg - values) ** 2)

if mask is not None:
np.putmask(sqr, mask, 0)
result = sqr.sum(axis=axis, dtype=np.float64) / d
Expand Down
62 changes: 46 additions & 16 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,7 @@ def test_frame_operators_none_to_nan(self):
df = pd.DataFrame({"a": ["a", None, "b"]})
tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]}))

@pytest.mark.parametrize("dtype", ("float", "int64"))
@pytest.mark.parametrize("dtype", ("float", "int64", "complex128"))
def test_frame_operators_empty_like(self, dtype):
# Test for issue #10181
frames = [
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def test_series_divmod_zero(self):
class TestUFuncCompat:
# TODO: add more dtypes
@pytest.mark.parametrize("holder", [Index, RangeIndex, Series])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
def test_ufunc_compat(self, holder, dtype):
box = Series if holder is Series else Index

Expand All @@ -1116,45 +1116,75 @@ def test_ufunc_compat(self, holder, dtype):
tm.assert_equal(result, expected)

# TODO: add more dtypes
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
def test_ufunc_coercions(self, index_or_series, dtype):
idx = index_or_series([1, 2, 3, 4, 5], dtype=dtype, name="x")
box = index_or_series

result = np.sqrt(idx)
assert result.dtype == "f8" and isinstance(result, box)
exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x")
assert isinstance(result, box)
if result.dtype.kind == "c":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel is there something in our type conversion / introspection functions that lets us cast to the nearest inexact data type? If not that might be something we want to do here or in a follow up PR to better handle this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that lets us cast to the nearest inexact data type

I don't think so, no. i expected maybe_promote to do that, but looks like it always gives float64

exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = np.divide(idx, 2.0)
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
assert isinstance(result, box)
if result.dtype.kind == "c":
exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

# _evaluate_numeric_binop
result = idx + 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x")
isinstance(result, box)
if result.dtype.kind == "c":
exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
MichaelTiemannOSC marked this conversation as resolved.
Show resolved Hide resolved
exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx - 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x")
isinstance(result, box)
if result.dtype.kind == "c":
exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx * 1.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x")
isinstance(result, box)
if result.dtype.kind == "c":
exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx / 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
isinstance(result, box)
if result.dtype.kind == "c":
exp_dtype = dtype
else:
# assert result.dtype == "f8"
exp_dtype = np.float64
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

Expand Down Expand Up @@ -1408,7 +1438,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected):
# __floordiv__
tm.assert_index_equal(idx // div, expected, exact=True)

@pytest.mark.parametrize("dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
@pytest.mark.parametrize("delta", [1, 0, -1])
def test_addsub_arithmetic(self, dtype, delta):
# GH#8142
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/base/dim2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pandas.core.dtypes.common import (
is_bool_dtype,
is_complex_dtype,
is_integer_dtype,
)

Expand Down Expand Up @@ -273,6 +274,9 @@ def get_reduction_result_dtype(dtype):
data = data.astype("Float64")
if method == "mean":
tm.assert_extension_array_equal(result, data)
elif is_complex_dtype(data) and method in ["std", "var"]:
# std and var produce real-only results
tm.assert_extension_array_equal(result, data - data, check_dtype=False)
else:
tm.assert_extension_array_equal(result, data - data)

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/base/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
class BaseParsingTests:
@pytest.mark.parametrize("engine", ["c", "python"])
def test_EA_types(self, engine, data, request):
if engine == "c" and data.dtype.kind == "c":
request.node.add_marker(
pytest.mark.xfail(
reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}"
)
)
if isinstance(data.dtype, pd.CategoricalDtype):
# in parsers.pyx _convert_with_dtype there is special-casing for
# Categorical that pre-empts _from_sequence_of_strings
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ def test_setitem_slice_array(self, data):

def test_setitem_scalar_key_sequence_raise(self, data):
arr = data[:5].copy()
with tm.external_error_raised(ValueError):
msg = "" # messages vary by subclass, so we do not test it
with pytest.raises(ValueError, match=msg):
arr[0] = arr[[0, 1]]

def test_setitem_preserves_views(self, data):
Expand Down Expand Up @@ -432,7 +433,7 @@ def test_setitem_invalid(self, data, invalid_scalar):
data[:] = invalid_scalar

def test_setitem_2d_values(self, data):
# GH50085
# GH54445
original = data.copy()
df = pd.DataFrame({"a": data, "b": data})
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
Expand Down
43 changes: 39 additions & 4 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
orig_assert_attr_equal(attr, left, right, obj)


@pytest.fixture(params=["float", "object"])
@pytest.fixture(params=["complex", "float", "object"])
def dtype(request):
return NumpyEADtype(np.dtype(request.param))

Expand Down Expand Up @@ -78,7 +78,10 @@ def allow_in_pandas(monkeypatch):
def data(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
return pd.Series([(i,) for i in range(100)]).array
return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype))
arr = np.arange(1, 101, dtype=dtype._dtype)
if dtype.kind == "c":
arr = arr + (arr * (0 + 1j))
return NumpyExtensionArray(arr)


@pytest.fixture
Expand Down Expand Up @@ -245,15 +248,15 @@ def test_insert_invalid(self, data, invalid_scalar):

def test_divmod(self, data):
divmod_exc = None
if data.dtype.kind == "O":
if data.dtype.kind in "Oc":
divmod_exc = TypeError
self.divmod_exc = divmod_exc
super().test_divmod(data)

def test_divmod_series_array(self, data):
ser = pd.Series(data)
exc = None
if data.dtype.kind == "O":
if data.dtype.kind in "Oc":
exc = TypeError
self.divmod_exc = exc
self._check_divmod_op(ser, divmod, data)
Expand All @@ -268,6 +271,13 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
)
request.node.add_marker(mark)
series_scalar_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
series_scalar_exc = TypeError
self.series_scalar_exc = series_scalar_exc
super().test_arith_series_with_scalar(data, all_arithmetic_operators)

Expand All @@ -276,6 +286,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
series_array_exc = None
if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
series_array_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
series_array_exc = TypeError
self.series_array_exc = series_array_exc
super().test_arith_series_with_array(data, all_arithmetic_operators)

Expand All @@ -289,6 +306,13 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
)
request.node.add_marker(mark)
frame_scalar_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
frame_scalar_exc = TypeError
self.frame_scalar_exc = frame_scalar_exc
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)

Expand Down Expand Up @@ -328,6 +352,17 @@ def test_fillna_frame(self, data_missing):
# Non-scalar "scalar" values.
super().test_fillna_frame(data_missing)

def test_fillna_no_op_returns_copy(self, data, request):
if data.dtype.kind == "c":
request.node.add_marker(
pytest.mark.xfail(
reason="no cython implementation of "
f"backfill(ndarray[{data.dtype.name}_t],"
f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd"
)
)
super().test_fillna_no_op_returns_copy(data)

@skip_nested
def test_setitem_invalid(self, data, invalid_scalar):
# object dtype can hold anything, so doesn't raise
Expand Down