pandas-dev · MichaelTiemannOSC · Aug 25, 2023 · Aug 25, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -170,6 +170,12 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
         return np.asarray(values)
 
     elif is_complex_dtype(values.dtype):
+        # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]"
+        # has no attribute "itemsize"
+        if values.dtype.itemsize in [32, 24, 16, 8]:  # type: ignore[union-attr]
+            # The test suite tests support for complex128; we presume that
+            # complex64, complex192, and complex256 work as well
+            return np.asarray(values)
         return cast(np.ndarray, values)
 
     # datetimelike

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -260,7 +260,13 @@ def shift(self, periods: int = 1, fill_value=None) -> Self:
     def __setitem__(self, key, value) -> None:
         key = check_array_indexer(self, key)
         value = self._validate_setitem_value(value)
-        self._ndarray[key] = value
+        try:
+            self._ndarray[key] = value
+        except TypeError as exc:
+            # Note: when `self._ndarray.dtype.kind == "c"`, Numpy incorrectly complains
+            # that `must be real number, not ...` when in reality
+            # a complex argument is more likely what's expected
+            raise ValueError(exc.args) from exc
 
     def _validate_setitem_value(self, value):
         return value

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -97,6 +97,11 @@ def _astype_nansafe(
     elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
         return _astype_float_to_int_nansafe(arr, dtype, copy)
 
+    elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype):
+        res = arr.astype(dtype, copy=copy)
+        res[np.isnan(arr)] = np.nan
+        return res
+
     elif arr.dtype == object:
         # if we have a datetime/timedelta array of objects
         # then coerce to datetime64[ns] and use DatetimeArray.astype

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -993,16 +993,30 @@ def nanvar(
         values = values.copy()
         np.putmask(values, mask, 0)
 
-    # xref GH10242
-    # Compute variance via two-pass algorithm, which is stable against
-    # cancellation errors and relatively accurate for small numbers of
-    # observations.
-    #
-    # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-    avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
-    if axis is not None:
-        avg = np.expand_dims(avg, axis)
-    sqr = _ensure_numeric((avg - values) ** 2)
+    if values.dtype.kind == "c":
+        # xref GH10242
+        # Compute variance via two-pass algorithm, which is stable against
+        # cancellation errors and relatively accurate for small numbers of
+        # observations.
+        #
+        # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance...
+        # ...but also,
+        # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar
+        # which explains why computing the variance of complex numbers
+        # requires first normalizing the complex differences to magnitudes
+        avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
+        if axis is not None:
+            avg = np.expand_dims(avg, axis)
+        deltas = _ensure_numeric(avg - values)
+        avg_re = np.real(deltas)
+        avg_im = np.imag(deltas)
+        sqr = avg_re**2 + avg_im**2
+    else:
+        avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
+        if axis is not None:
+            avg = np.expand_dims(avg, axis)
+        sqr = _ensure_numeric((avg - values) ** 2)
+
     if mask is not None:
         np.putmask(sqr, mask, 0)
     result = sqr.sum(axis=axis, dtype=np.float64) / d

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -1003,7 +1003,7 @@ def test_frame_operators_none_to_nan(self):
         df = pd.DataFrame({"a": ["a", None, "b"]})
         tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]}))
 
-    @pytest.mark.parametrize("dtype", ("float", "int64"))
+    @pytest.mark.parametrize("dtype", ("float", "int64", "complex128"))
     def test_frame_operators_empty_like(self, dtype):
         # Test for issue #10181
         frames = [
@@ -1101,7 +1101,7 @@ def test_series_divmod_zero(self):
 class TestUFuncCompat:
     # TODO: add more dtypes
     @pytest.mark.parametrize("holder", [Index, RangeIndex, Series])
-    @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
+    @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
     def test_ufunc_compat(self, holder, dtype):
         box = Series if holder is Series else Index
 
@@ -1116,45 +1116,75 @@ def test_ufunc_compat(self, holder, dtype):
         tm.assert_equal(result, expected)
 
     # TODO: add more dtypes
-    @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
+    @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
     def test_ufunc_coercions(self, index_or_series, dtype):
         idx = index_or_series([1, 2, 3, 4, 5], dtype=dtype, name="x")
         box = index_or_series
 
         result = np.sqrt(idx)
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x")
+        assert isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
         result = np.divide(idx, 2.0)
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
+        assert isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
         # _evaluate_numeric_binop
         result = idx + 2.0
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x")
+        isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
         result = idx - 2.0
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x")
+        isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
         result = idx * 1.0
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x")
+        isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
         result = idx / 2.0
-        assert result.dtype == "f8" and isinstance(result, box)
-        exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
+        isinstance(result, box)
+        if result.dtype.kind == "c":
+            exp_dtype = dtype
+        else:
+            # assert result.dtype == "f8"
+            exp_dtype = np.float64
+        exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
         exp = tm.box_expected(exp, box)
         tm.assert_equal(result, exp)
 
@@ -1408,7 +1438,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected):
         # __floordiv__
         tm.assert_index_equal(idx // div, expected, exact=True)
 
-    @pytest.mark.parametrize("dtype", [np.int64, np.float64])
+    @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
     @pytest.mark.parametrize("delta", [1, 0, -1])
     def test_addsub_arithmetic(self, dtype, delta):
         # GH#8142

diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -9,6 +9,7 @@
 
 from pandas.core.dtypes.common import (
     is_bool_dtype,
+    is_complex_dtype,
     is_integer_dtype,
 )
 
@@ -273,6 +274,9 @@ def get_reduction_result_dtype(dtype):
                 data = data.astype("Float64")
             if method == "mean":
                 tm.assert_extension_array_equal(result, data)
+            elif is_complex_dtype(data) and method in ["std", "var"]:
+                # std and var produce real-only results
+                tm.assert_extension_array_equal(result, data - data, check_dtype=False)
             else:
                 tm.assert_extension_array_equal(result, data - data)
 

diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py
@@ -11,6 +11,12 @@
 class BaseParsingTests:
     @pytest.mark.parametrize("engine", ["c", "python"])
     def test_EA_types(self, engine, data, request):
+        if engine == "c" and data.dtype.kind == "c":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}"
+                )
+            )
         if isinstance(data.dtype, pd.CategoricalDtype):
             # in parsers.pyx _convert_with_dtype there is special-casing for
             #  Categorical that pre-empts _from_sequence_of_strings

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -344,7 +344,8 @@ def test_setitem_slice_array(self, data):
 
     def test_setitem_scalar_key_sequence_raise(self, data):
         arr = data[:5].copy()
-        with tm.external_error_raised(ValueError):
+        msg = ""  # messages vary by subclass, so we do not test it
+        with pytest.raises(ValueError, match=msg):
             arr[0] = arr[[0, 1]]
 
     def test_setitem_preserves_views(self, data):
@@ -432,7 +433,7 @@ def test_setitem_invalid(self, data, invalid_scalar):
             data[:] = invalid_scalar
 
     def test_setitem_2d_values(self, data):
-        # GH50085
+        # GH54445
         original = data.copy()
         df = pd.DataFrame({"a": data, "b": data})
         df.loc[[0, 1], :] = df.loc[[1, 0], :].values

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -46,7 +46,7 @@ def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
     orig_assert_attr_equal(attr, left, right, obj)
 
 
-@pytest.fixture(params=["float", "object"])
+@pytest.fixture(params=["complex", "float", "object"])
 def dtype(request):
     return NumpyEADtype(np.dtype(request.param))
 
@@ -78,7 +78,10 @@ def allow_in_pandas(monkeypatch):
 def data(allow_in_pandas, dtype):
     if dtype.numpy_dtype == "object":
         return pd.Series([(i,) for i in range(100)]).array
-    return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype))
+    arr = np.arange(1, 101, dtype=dtype._dtype)
+    if dtype.kind == "c":
+        arr = arr + (arr * (0 + 1j))
+    return NumpyExtensionArray(arr)
 
 
 @pytest.fixture
@@ -245,15 +248,15 @@ def test_insert_invalid(self, data, invalid_scalar):
 
     def test_divmod(self, data):
         divmod_exc = None
-        if data.dtype.kind == "O":
+        if data.dtype.kind in "Oc":
             divmod_exc = TypeError
         self.divmod_exc = divmod_exc
         super().test_divmod(data)
 
     def test_divmod_series_array(self, data):
         ser = pd.Series(data)
         exc = None
-        if data.dtype.kind == "O":
+        if data.dtype.kind in "Oc":
             exc = TypeError
             self.divmod_exc = exc
         self._check_divmod_op(ser, divmod, data)
@@ -268,6 +271,13 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
                 )
                 request.node.add_marker(mark)
             series_scalar_exc = TypeError
+        elif data.dtype.kind == "c" and opname in [
+            "__floordiv__",
+            "__rfloordiv__",
+            "__mod__",
+            "__rmod__",
+        ]:
+            series_scalar_exc = TypeError
         self.series_scalar_exc = series_scalar_exc
         super().test_arith_series_with_scalar(data, all_arithmetic_operators)
 
@@ -276,6 +286,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
         series_array_exc = None
         if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
             series_array_exc = TypeError
+        elif data.dtype.kind == "c" and opname in [
+            "__floordiv__",
+            "__rfloordiv__",
+            "__mod__",
+            "__rmod__",
+        ]:
+            series_array_exc = TypeError
         self.series_array_exc = series_array_exc
         super().test_arith_series_with_array(data, all_arithmetic_operators)
 
@@ -289,6 +306,13 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
                 )
                 request.node.add_marker(mark)
             frame_scalar_exc = TypeError
+        elif data.dtype.kind == "c" and opname in [
+            "__floordiv__",
+            "__rfloordiv__",
+            "__mod__",
+            "__rmod__",
+        ]:
+            frame_scalar_exc = TypeError
         self.frame_scalar_exc = frame_scalar_exc
         super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
 
@@ -328,6 +352,17 @@ def test_fillna_frame(self, data_missing):
         # Non-scalar "scalar" values.
         super().test_fillna_frame(data_missing)
 
+    def test_fillna_no_op_returns_copy(self, data, request):
+        if data.dtype.kind == "c":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="no cython implementation of "
+                    f"backfill(ndarray[{data.dtype.name}_t],"
+                    f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd"
+                )
+            )
+        super().test_fillna_no_op_returns_copy(data)
+
     @skip_nested
     def test_setitem_invalid(self, data, invalid_scalar):
         # object dtype can hold anything, so doesn't raise