From 3782dd17c37df76c75de14b12b5afa88f812199c Mon Sep 17 00:00:00 2001 From: mutricyl <118692416+mutricyl@users.noreply.github.com> Date: Mon, 1 Jul 2024 21:15:24 +0200 Subject: [PATCH] remove ops div class to solve #21374 (#59144) * remove core.computation.ops.Div resolves #21374 #58748 * need to preserve order * updating tests * update whatsnew * solve mypy issue * fixing pytests * better than cast * adding specific test * Update pandas/tests/frame/test_query_eval.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * Update pandas/tests/computation/test_eval.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --------- Co-authored-by: Laurent Mutricy Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_testing/__init__.py | 1 + pandas/conftest.py | 15 ++++++++ pandas/core/computation/expr.py | 6 +--- pandas/core/computation/ops.py | 52 --------------------------- pandas/tests/computation/test_eval.py | 23 ++++++++---- pandas/tests/frame/test_query_eval.py | 16 +++++++-- 7 files changed, 48 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index afb2f91f65ccd..a94d0588a081d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -616,6 +616,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) +- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fb8ca8aad3428..1cd91ee5b120c 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -107,6 +107,7 @@ COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] STRING_DTYPES: list[Dtype] = [str, "str", "U"] +COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES] DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] diff --git a/pandas/conftest.py b/pandas/conftest.py index c3bfc8c06ad8a..70e729dfb98a4 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1448,6 +1448,21 @@ def complex_dtype(request): return request.param +@pytest.fixture(params=tm.COMPLEX_FLOAT_DTYPES) +def complex_or_float_dtype(request): + """ + Parameterized fixture for complex and numpy float dtypes. + + * complex + * 'complex64' + * 'complex128' + * float + * 'float32' + * 'float64' + """ + return request.param + + @pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) def any_signed_int_numpy_dtype(request): """ diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index b287cd542068d..b074e768e0842 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -32,7 +32,6 @@ UNARY_OPS_SYMS, BinOp, Constant, - Div, FuncNode, Op, Term, @@ -374,7 +373,7 @@ class BaseExprVisitor(ast.NodeVisitor): "Add", "Sub", "Mult", - None, + "Div", "Pow", "FloorDiv", "Mod", @@ -537,9 +536,6 @@ def visit_BinOp(self, node, **kwargs): left, right = self._maybe_downcast_constants(left, right) return self._maybe_evaluate_binop(op, op_class, left, right) - def visit_Div(self, node, **kwargs): - return lambda lhs, rhs: Div(lhs, rhs) - def visit_UnaryOp(self, node, **kwargs): op = self.visit(node.op) operand = self.visit(node.operand) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 056325fd2e4ab..a1a5f77f8539e 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -18,7 +18,6 @@ from pandas.core.dtypes.common import ( is_list_like, - is_numeric_dtype, is_scalar, ) @@ -328,31 +327,6 @@ def _not_in(x, y): _binary_ops_dict.update(d) -def _cast_inplace(terms, acceptable_dtypes, dtype) -> None: - """ - Cast an expression inplace. - - Parameters - ---------- - terms : Op - The expression that should cast. - acceptable_dtypes : list of acceptable numpy.dtype - Will not cast if term's dtype in this list. - dtype : str or numpy.dtype - The dtype to cast to. - """ - dt = np.dtype(dtype) - for term in terms: - if term.type in acceptable_dtypes: - continue - - try: - new_value = term.value.astype(dt) - except AttributeError: - new_value = dt.type(term.value) - term.update(new_value) - - def is_term(obj) -> bool: return isinstance(obj, Term) @@ -509,32 +483,6 @@ def _disallow_scalar_only_bool_ops(self) -> None: raise NotImplementedError("cannot evaluate scalar only bool ops") -class Div(BinOp): - """ - Div operator to special case casting. - - Parameters - ---------- - lhs, rhs : Term or Op - The Terms or Ops in the ``/`` expression. - """ - - def __init__(self, lhs, rhs) -> None: - super().__init__("/", lhs, rhs) - - if not is_numeric_dtype(lhs.return_type) or not is_numeric_dtype( - rhs.return_type - ): - raise TypeError( - f"unsupported operand type(s) for {self.op}: " - f"'{lhs.return_type}' and '{rhs.return_type}'" - ) - - # do not upcast float32s to float64 un-necessarily - acceptable_dtypes = [np.float32, np.float64] - _cast_inplace(com.flatten(self), acceptable_dtypes, np.float64) - - UNARY_OPS_SYMS = ("+", "-", "~", "not") _unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) _unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index d52f33fe80434..1844b47847e95 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -758,16 +758,25 @@ class TestTypeCasting: # maybe someday... numexpr has too many upcasting rules now # chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float'])) @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) - def test_binop_typecasting(self, engine, parser, op, float_numpy_dtype, left_right): - df = DataFrame( - np.random.default_rng(2).standard_normal((5, 3)), dtype=float_numpy_dtype - ) + def test_binop_typecasting( + self, engine, parser, op, complex_or_float_dtype, left_right, request + ): + # GH#21374 + dtype = complex_or_float_dtype + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype) left, right = left_right s = f"{left} {op} {right}" res = pd.eval(s, engine=engine, parser=parser) - assert df.values.dtype == float_numpy_dtype - assert res.values.dtype == float_numpy_dtype - tm.assert_frame_equal(res, eval(s)) + if dtype == "complex64" and engine == "numexpr": + mark = pytest.mark.xfail( + reason="numexpr issue with complex that are upcast " + "to complex 128 " + "https://github.com/pydata/numexpr/issues/492" + ) + request.applymarker(mark) + assert df.values.dtype == dtype + assert res.values.dtype == dtype + tm.assert_frame_equal(res, eval(s), check_exact=False) # ------------------------------------- diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index ff1bf5632e920..c9ea5f379f1e9 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -202,11 +202,23 @@ def test_eval_simple(self, engine, parser): expected = df["a"] tm.assert_series_equal(expected, res) - def test_extension_array_eval(self, engine, parser): + def test_extension_array_eval(self, engine, parser, request): # GH#58748 + if engine == "numexpr": + mark = pytest.mark.xfail( + reason="numexpr does not support extension array dtypes" + ) + request.applymarker(mark) df = DataFrame({"a": pd.array([1, 2, 3]), "b": pd.array([4, 5, 6])}) result = df.eval("a / b", engine=engine, parser=parser) - expected = Series([0.25, 0.40, 0.50]) + expected = Series(pd.array([0.25, 0.40, 0.50])) + tm.assert_series_equal(result, expected) + + def test_complex_eval(self, engine, parser): + # GH#21374 + df = DataFrame({"a": [1 + 2j], "b": [1 + 1j]}) + result = df.eval("a/b", engine=engine, parser=parser) + expected = Series([1.5 + 0.5j]) tm.assert_series_equal(result, expected)