Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable many complex number tests #54761

Closed
Closed
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
6125494
Enable many complex number tests
MichaelTiemannOSC Aug 25, 2023
e7a285a
Update v2.1.0.rst
MichaelTiemannOSC Aug 25, 2023
02719d9
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Aug 29, 2023
f9bfeb9
Fix merge error in test_decimal.py
MichaelTiemannOSC Aug 29, 2023
077213f
Simplify test_fillna_no_op_returns_copy
MichaelTiemannOSC Aug 29, 2023
9fda0ef
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Sep 8, 2023
d25baa2
changes from review
MichaelTiemannOSC Sep 8, 2023
ad841bf
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Sep 8, 2023
7535374
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Sep 22, 2023
7ef6052
Use LSP parameter style for request
MichaelTiemannOSC Sep 22, 2023
f1139f5
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 10, 2023
19d3127
Handle complex128 EA in _ensure_data
MichaelTiemannOSC Oct 11, 2023
67e2dbc
Fix mypy pre-commit problems
MichaelTiemannOSC Oct 12, 2023
909ced4
Remove some LSP sigs for _get_expected_exception
MichaelTiemannOSC Oct 13, 2023
48cb330
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 13, 2023
bc96021
Additional `requests` removed; indentation fix
MichaelTiemannOSC Oct 13, 2023
d98e6f0
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Oct 14, 2023
dabaf6f
Keep rval refs alive in StringHashTable._unique
MichaelTiemannOSC Oct 15, 2023
61c9b32
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Nov 4, 2023
6ed24ad
Code review changes
MichaelTiemannOSC Nov 4, 2023
e923878
Fix incomplete removal of `keep_rval_refs`
MichaelTiemannOSC Nov 4, 2023
5efad33
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Dec 9, 2023
51450c8
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Dec 9, 2023
c31b213
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
MichaelTiemannOSC Jan 5, 2024
9473130
Update io.py
MichaelTiemannOSC Jan 5, 2024
a86c896
Update test_numpy.py
MichaelTiemannOSC Jan 5, 2024
de56177
Update test_numpy.py
MichaelTiemannOSC Jan 5, 2024
198a16d
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 5, 2024
554a5c3
Update ops.py
MichaelTiemannOSC Jan 6, 2024
6ddb7f7
Update test_decimal.py
MichaelTiemannOSC Jan 6, 2024
c4a17a7
Further simplifications due to upstream
MichaelTiemannOSC Jan 6, 2024
040c98b
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
3a58f5a
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
29aa747
Update test_arrow.py
MichaelTiemannOSC Jan 6, 2024
5210c8b
setitem exceptions for complex raise ValueError
MichaelTiemannOSC Jan 9, 2024
9f4bea5
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 16, 2024
be1f02b
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Jan 23, 2024
b3edefa
Update _mixins.py
MichaelTiemannOSC Jan 23, 2024
89ea60b
Incorporate feedback
MichaelTiemannOSC Jan 31, 2024
4dc3bea
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Mar 22, 2024
4e273fa
Update test_sparse.py
MichaelTiemannOSC Mar 22, 2024
abfdedb
Merge branch 'main' into test_numpy_complex2
MichaelTiemannOSC Mar 29, 2024
59b50c9
Update algorithms.py
MichaelTiemannOSC Mar 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
return np.asarray(values)

elif is_complex_dtype(values.dtype):
# error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]"
# has no attribute "itemsize"
if values.dtype.itemsize == 16: # type: ignore[union-attr]
# We have support for complex128
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not complex64?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be happy to add complex64 to the test suite, but thought it better to do one case (complex128) at a time. I didn't want to add something (values.dtype.itemsize == 8) that wasn't tested by the test suite.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not a deal-breaker, but id prefer to do them both in this pass. or at least leave a comment explaining why one is excluded

return np.asarray(values)
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
return cast(np.ndarray, values)

# datetimelike
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def _astype_nansafe(
elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
return _astype_float_to_int_nansafe(arr, dtype, copy)

elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype):
res = arr.astype(dtype, copy=copy)
res[np.isnan(arr)] = np.nan
return res

elif arr.dtype == object:
# if we have a datetime/timedelta array of objects
# then coerce to datetime64[ns] and use DatetimeArray.astype
Expand Down
34 changes: 24 additions & 10 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,16 +1004,30 @@ def nanvar(
values = values.copy()
np.putmask(values, mask, 0)

# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
sqr = _ensure_numeric((avg - values) ** 2)
if values.dtype.kind == "c":
MichaelTiemannOSC marked this conversation as resolved.
Show resolved Hide resolved
# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance...
# ...but also,
# see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar
# which explains why computing the variance of complex numbers
# requires first normalizing the complex differences to magnitudes
avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
deltas = _ensure_numeric(avg - values)
avg_re = np.real(deltas)
avg_im = np.imag(deltas)
sqr = avg_re**2 + avg_im**2
else:
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
sqr = _ensure_numeric((avg - values) ** 2)

if mask is not None:
np.putmask(sqr, mask, 0)
result = sqr.sum(axis=axis, dtype=np.float64) / d
Expand Down
62 changes: 46 additions & 16 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,7 @@ def test_frame_operators_none_to_nan(self):
df = pd.DataFrame({"a": ["a", None, "b"]})
tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]}))

@pytest.mark.parametrize("dtype", ("float", "int64"))
@pytest.mark.parametrize("dtype", ("float", "int64", "complex128"))
def test_frame_operators_empty_like(self, dtype):
# Test for issue #10181
frames = [
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def test_series_divmod_zero(self):
class TestUFuncCompat:
# TODO: add more dtypes
@pytest.mark.parametrize("holder", [Index, RangeIndex, Series])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
def test_ufunc_compat(self, holder, dtype):
box = Series if holder is Series else Index

Expand All @@ -1116,45 +1116,75 @@ def test_ufunc_compat(self, holder, dtype):
tm.assert_equal(result, expected)

# TODO: add more dtypes
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
def test_ufunc_coercions(self, index_or_series, dtype):
idx = index_or_series([1, 2, 3, 4, 5], dtype=dtype, name="x")
box = index_or_series

result = np.sqrt(idx)
assert result.dtype == "f8" and isinstance(result, box)
exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x")
if result.dtype.kind == "c":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel is there something in our type conversion / introspection functions that lets us cast to the nearest inexact data type? If not that might be something we want to do here or in a follow up PR to better handle this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that lets us cast to the nearest inexact data type

I don't think so, no. i expected maybe_promote to do that, but looks like it always gives float64

assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = np.divide(idx, 2.0)
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
if result.dtype.kind == "c":
assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

# _evaluate_numeric_binop
result = idx + 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x")
if result.dtype.kind == "c":
assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
MichaelTiemannOSC marked this conversation as resolved.
Show resolved Hide resolved
exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx - 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x")
if result.dtype.kind == "c":
assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx * 1.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x")
if result.dtype.kind == "c":
assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

result = idx / 2.0
assert result.dtype == "f8" and isinstance(result, box)
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
if result.dtype.kind == "c":
assert result.dtype == dtype and isinstance(result, box)
exp_dtype = dtype
else:
assert result.dtype == "f8" and isinstance(result, box)
exp_dtype = np.float64
exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
exp = tm.box_expected(exp, box)
tm.assert_equal(result, exp)

Expand Down Expand Up @@ -1408,7 +1438,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected):
# __floordiv__
tm.assert_index_equal(idx // div, expected, exact=True)

@pytest.mark.parametrize("dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
@pytest.mark.parametrize("delta", [1, 0, -1])
def test_addsub_arithmetic(self, dtype, delta):
# GH#8142
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/base/dim2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas.core.dtypes.common import (
is_bool_dtype,
is_complex_dtype,
is_integer_dtype,
)

Expand Down Expand Up @@ -272,6 +273,9 @@ def get_reduction_result_dtype(dtype):
data = data.astype("Float64")
if method == "mean":
tm.assert_extension_array_equal(result, data)
elif is_complex_dtype(data) and method in ["std", "var"]:
# std and var produce real-only results
tm.assert_extension_array_equal(result, data - data, check_dtype=False)
else:
tm.assert_extension_array_equal(result, data - data)

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/base/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
class BaseParsingTests:
@pytest.mark.parametrize("engine", ["c", "python"])
def test_EA_types(self, engine, data, request):
if engine == "c" and data.dtype.kind == "c":
request.node.add_marker(
pytest.mark.xfail(
reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}"
)
)
if isinstance(data.dtype, pd.CategoricalDtype):
# in parsers.pyx _convert_with_dtype there is special-casing for
# Categorical that pre-empts _from_sequence_of_strings
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ def test_setitem_slice_array(self, data):

def test_setitem_scalar_key_sequence_raise(self, data):
arr = data[:5].copy()
with tm.external_error_raised(ValueError):
# complex128 data raises TypeError; other numeric types raise ValueError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know where the ValueError is being thrown? I think the type of error should stay consistent

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When running pandas/tests/extension/test_numpy.py and data is an array of float64 we see:

ValueError: setting an array element with a sequence.

When data is complex128 we see:

TypeError: must be real number, not NumpyExtensionArray

The ValueError seems to come from Numpy, and the TypeError seems to come from Python, both coming from __setitem__ in class NDArrayBackedExtensionArray in pandas/core/arrays/_mixins.py.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we catch the Python TypeError and reraise as a ValueError?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've amended the PR to attempt this. Please let me know what you think.

with pytest.raises((ValueError, TypeError)):
arr[0] = arr[[0, 1]]

def test_setitem_preserves_views(self, data):
Expand Down Expand Up @@ -439,7 +440,7 @@ def test_setitem_invalid(self, data, invalid_scalar):
data[:] = invalid_scalar

def test_setitem_2d_values(self, data):
# GH50085
# GH54445
original = data.copy()
df = pd.DataFrame({"a": data, "b": data})
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
Expand Down
43 changes: 39 additions & 4 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
orig_assert_attr_equal(attr, left, right, obj)


@pytest.fixture(params=["float", "object"])
@pytest.fixture(params=["complex", "float", "object"])
def dtype(request):
return NumpyEADtype(np.dtype(request.param))

Expand Down Expand Up @@ -77,7 +77,10 @@ def allow_in_pandas(monkeypatch):
def data(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
return pd.Series([(i,) for i in range(100)]).array
return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype))
arr = np.arange(1, 101, dtype=dtype._dtype)
if dtype.kind == "c":
arr = arr + (arr * (0 + 1j))
return NumpyExtensionArray(arr)


@pytest.fixture
Expand Down Expand Up @@ -244,15 +247,15 @@ def test_insert_invalid(self, data, invalid_scalar):

def test_divmod(self, data):
divmod_exc = None
if data.dtype.kind == "O":
if data.dtype.kind in "Oc":
divmod_exc = TypeError
self.divmod_exc = divmod_exc
super().test_divmod(data)

def test_divmod_series_array(self, data):
ser = pd.Series(data)
exc = None
if data.dtype.kind == "O":
if data.dtype.kind in "Oc":
exc = TypeError
self.divmod_exc = exc
self._check_divmod_op(ser, divmod, data)
Expand All @@ -267,6 +270,13 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
)
request.node.add_marker(mark)
series_scalar_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
series_scalar_exc = TypeError
self.series_scalar_exc = series_scalar_exc
super().test_arith_series_with_scalar(data, all_arithmetic_operators)

Expand All @@ -275,6 +285,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
series_array_exc = None
if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
series_array_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
series_array_exc = TypeError
self.series_array_exc = series_array_exc
super().test_arith_series_with_array(data, all_arithmetic_operators)

Expand All @@ -288,6 +305,13 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
)
request.node.add_marker(mark)
frame_scalar_exc = TypeError
elif data.dtype.kind == "c" and opname in [
"__floordiv__",
"__rfloordiv__",
"__mod__",
"__rmod__",
]:
frame_scalar_exc = TypeError
self.frame_scalar_exc = frame_scalar_exc
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)

Expand Down Expand Up @@ -326,6 +350,17 @@ def test_fillna_frame(self, data_missing):
# Non-scalar "scalar" values.
super().test_fillna_frame(data_missing)

def test_fillna_no_op_returns_copy(self, data, request):
if data.dtype.kind == "c":
request.node.add_marker(
pytest.mark.xfail(
reason="no cython implementation of "
f"backfill(ndarray[{data.dtype.name}_t],"
f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd"
)
)
super().test_fillna_no_op_returns_copy(data)

@skip_nested
def test_setitem_invalid(self, data, invalid_scalar):
# object dtype can hold anything, so doesn't raise
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def test_fillna_limit_backfill(self, data_missing):
super().test_fillna_limit_backfill(data_missing)

def test_fillna_no_op_returns_copy(self, data, request):
# `data` never contains complex numbers in these tests
if np.isnan(data.fill_value):
request.applymarker(
pytest.mark.xfail(reason="returns array with different fill value")
Expand Down
Loading