diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6ccb8dc5d2725..5f46d27fe243a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,6 +27,7 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna +from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms from pandas.core.algorithms import factorize, take, take_1d, unique1d @@ -53,14 +54,14 @@ def _cat_compare_op(op): + + @ops.unpack_and_defer def f(self, other): # On python2, you can usually compare any type to any type, and # Categoricals can be seen as a custom type, but having different # results depending whether categories are the same or not is kind of # insane, so be a bit stricter here and use the python3 idea of # comparing only things of equal type. - if isinstance(other, ABCSeries): - return NotImplemented if not self.ordered: if op in ['__lt__', '__gt__', '__le__', '__ge__']: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index fa6941476522d..ceb3bd2503204 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,6 +36,8 @@ def _make_comparison_op(cls, op): # TODO: share code with indexes.base version? Main difference is that # the block for MultiIndex was removed here. + + # @ops.unpack_and_defer def cmp_method(self, other): if isinstance(other, ABCDataFrame): return NotImplemented @@ -885,6 +887,7 @@ def _time_shift(self, periods, freq=None): return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + # @ops.unpack_and_defer def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): @@ -946,6 +949,7 @@ def __radd__(self, other): # alias for __add__ return self.__add__(other) + # @ops.unpack_and_defer def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 89d82f1ed2042..3e2d8820c2904 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -93,6 +93,7 @@ def _dt_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False + # @ops.unpack_and_defer def wrapper(self, other): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 38dc68e8f77a3..facff9e0934b7 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, notna -from pandas.core import nanops +from pandas.core import nanops, ops from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin @@ -483,25 +483,17 @@ def _values_for_argsort(self): @classmethod def _create_comparison_method(cls, op): + + # TODO: needs tests for deferring to DataFrame + @ops.unpack_and_defer def cmp_method(self, other): op_name = op.__name__ mask = None - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to unbox and dispatch to us. - return NotImplemented - if isinstance(other, IntegerArray): other, mask = other._data, other._mask - elif is_list_like(other): - other = np.asarray(other) - if other.ndim > 0 and len(self) != len(other): - raise ValueError('Lengths must match to compare') - - other = lib.item_from_zerodim(other) - # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(): @@ -573,6 +565,8 @@ def _maybe_mask_result(self, result, mask, other, op_name): @classmethod def _create_arithmetic_method(cls, op): + + # @ops.unpack_and_defer def integer_arithmetic_method(self, other): op_name = op.__name__ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 178efbe6f8376..12df91818f762 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -51,6 +51,7 @@ def _period_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False + # @ops.unpack_and_defer def wrapper(self, other): op = getattr(self.asi8, opname) # We want to eventually defer to the Series or PeriodIndex (which will diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9e1d2efc21b81..ba2fef5f5bb73 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -31,6 +31,7 @@ ABCIndexClass, ABCSeries, ABCSparseSeries) from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin @@ -1650,13 +1651,12 @@ def sparse_unary_method(self): @classmethod def _create_arithmetic_method(cls, op): + + # TODO: needs test for deferring to DataFrame, op with 0-dim + @ops.unpack_and_defer def sparse_arithmetic_method(self, other): op_name = op.__name__ - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to dispatch to us. - return NotImplemented - if isinstance(other, SparseArray): return _sparse_array_op(self, other, op, op_name) @@ -1678,10 +1678,6 @@ def sparse_arithmetic_method(self, other): with np.errstate(all='ignore'): # TODO: delete sparse stuff in core/ops.py # TODO: look into _wrap_result - if len(self) != len(other): - raise AssertionError( - ("length mismatch: {self} vs. {other}".format( - self=len(self), other=len(other)))) if not isinstance(other, SparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, @@ -1693,26 +1689,21 @@ def sparse_arithmetic_method(self, other): @classmethod def _create_comparison_method(cls, op): + + # TODO: needs test for deferring to DataFrame, op with 0-dim + @ops.unpack_and_defer def cmp_method(self, other): op_name = op.__name__ if op_name in {'and_', 'or_'}: op_name = op_name[:-1] - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to unbox and dispatch to us. - return NotImplemented - if not is_scalar(other) and not isinstance(other, type(self)): # convert list-like to ndarray other = np.asarray(other) if isinstance(other, np.ndarray): # TODO: make this more flexible than just ndarray... - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), - other=len(other))) other = SparseArray(other, fill_value=self.fill_value) if isinstance(other, SparseArray): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a5d074df338ee..3b2f561f6256a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,8 +19,7 @@ is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna from pandas.core import ops @@ -74,6 +73,7 @@ def _td_array_cmp(cls, op): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) + # @ops.unpack_and_defer def wrapper(self, other): if _is_convertible_to_td(other) or other is NaT: try: @@ -302,11 +302,8 @@ def _addsub_offset_array(self, other, op): raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" .format(cls=type(self).__name__)) + @ops.unpack_and_defer def __mul__(self, other): - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): - return NotImplemented if is_scalar(other): # numpy will accept float and int, raise TypeError for others @@ -316,14 +313,6 @@ def __mul__(self, other): freq = self.freq * other return type(self)(result, freq=freq) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if len(other) != len(self) and not is_timedelta64_dtype(other): - # Exclude timedelta64 here so we correctly raise TypeError - # for that instead of ValueError - raise ValueError("Cannot multiply with unequal lengths") - if is_object_dtype(other): # this multiplication will succeed only if all elements of other # are int or float scalars, so we will end up with @@ -338,12 +327,9 @@ def __mul__(self, other): __rmul__ = __mul__ + @ops.unpack_and_defer def __truediv__(self, other): # timedelta / X is well-defined for timedelta-like or numeric X - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -365,14 +351,7 @@ def __truediv__(self, other): freq = self.freq.delta / other return type(self)(result, freq=freq) - if not hasattr(other, "dtype"): - # e.g. list, tuple - other = np.array(other) - - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): # let numpy handle it return self._data / other @@ -388,12 +367,9 @@ def __truediv__(self, other): result = self._data / other return type(self)(result) + @ops.unpack_and_defer def __rtruediv__(self, other): # X / timedelta is defined only for timedelta-like X - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -411,14 +387,7 @@ def __rtruediv__(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) - if not hasattr(other, "dtype"): - # e.g. list, tuple - other = np.array(other) - - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): # let numpy handle it return other / self._data @@ -438,11 +407,9 @@ def __rtruediv__(self, other): __div__ = __truediv__ __rdiv__ = __rtruediv__ + @ops.unpack_and_defer def __floordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -466,13 +433,7 @@ def __floordiv__(self, other): freq = self.freq / other return type(self)(result.view('m8[ns]'), freq=freq) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -501,11 +462,9 @@ def __floordiv__(self, other): raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__)) + @ops.unpack_and_defer def __rfloordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -523,13 +482,7 @@ def __rfloordiv__(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -551,32 +504,23 @@ def __rfloordiv__(self, other): raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__)) + @ops.unpack_and_defer def __mod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) return self - (self // other) * other + @ops.unpack_and_defer def __rmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) return other - (other // self) * self + @ops.unpack_and_defer def __divmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -584,12 +528,9 @@ def __divmod__(self, other): res2 = self - res1 * other return res1, res2 + @ops.unpack_and_defer def __rdivmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 63c0827cccb82..345b057c19aa9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -63,6 +63,8 @@ def _try_get_item(x): def _make_comparison_op(op, cls): + + # @ops.unpack_and_defer def cmp_method(self, other): if isinstance(other, (np.ndarray, Index, ABCSeries)): if other.ndim > 0 and len(self) != len(other): @@ -98,10 +100,9 @@ def cmp_method(self, other): def _make_arithmetic_op(op, cls): + @ops.unpack_and_defer def index_arithmetic_method(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - elif isinstance(other, ABCTimedeltaIndex): + if isinstance(other, ABCTimedeltaIndex): # Defer to subclass implementation return NotImplemented elif (isinstance(other, (np.ndarray, ABCTimedeltaArray)) and @@ -2201,6 +2202,7 @@ def _get_unique_index(self, dropna=False): # -------------------------------------------------------------------- # Arithmetic & Logical Methods + # @ops.unpack_and_defer def __add__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0da924de244ed..030bbab06a244 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,8 +14,7 @@ from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype) -from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCSeries, ABCTimedeltaIndex) +from pandas.core.dtypes.generic import ABCTimedeltaIndex from pandas.core import ops import pandas.core.common as com @@ -576,10 +575,8 @@ def __getitem__(self, key): # fall back to Int64Index return super_getitem(key) + @ops.unpack_and_defer def __floordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - if is_integer(other) and other != 0: if (len(self) == 0 or self._start % other == 0 and @@ -610,10 +607,9 @@ def _make_evaluate_binop(op, step=False): if False, use the existing step """ + @ops.unpack_and_defer def _evaluate_numeric_binop(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - elif isinstance(other, ABCTimedeltaIndex): + if isinstance(other, ABCTimedeltaIndex): # Defer to TimedeltaIndex implementation return NotImplemented elif isinstance(other, (timedelta, np.timedelta64)): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 41e3f4581587e..9b83faa30b2fc 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -7,6 +7,7 @@ from __future__ import division import datetime +from functools import wraps import operator import textwrap import warnings @@ -136,6 +137,49 @@ def maybe_upcast_for_op(obj): return obj +def unpack_and_defer(func): + """ + Decorate a binary method to return NotImplemented where appropriate, + unpack arguments, and validate argument shapes. + + Parameters + ---------- + func : binary method + + Returns + ------- + decorated : binary method + """ + + @wraps(func) + def method(self, other): + other = lib.item_from_zerodim(other) + # TODO: maybe_upcast_for_op? + + for abc_cls in [ABCDataFrame, ABCSeries, ABCIndexClass]: + if isinstance(self, abc_cls): + break + elif isinstance(other, abc_cls): + return NotImplemented + + if is_list_like(other): + if not hasattr(other, 'dtype'): + # list, tuple... + if np.ndim(other) == 0: + # i.e. generator + other = list(other) + other = np.array(other) + + if not hasattr(self, 'index') and len(other) != len(self): + # Series and DataFrame may have alignment calls after this, + # so we cannot do a length check yet + raise ValueError("Lengths must match") + + return func(self, other) + + return method + + # ----------------------------------------------------------------------------- # Reversed Operations not available in the stdlib operator module. # Defining these instead of using lambdas allows us to reference them by name. @@ -1526,6 +1570,7 @@ def safe_na_op(lvalues, rvalues): lambda x: op(x, rvalues)) raise + # @unpack_and_defer def wrapper(left, right): if isinstance(right, ABCDataFrame): return NotImplemented @@ -1574,6 +1619,7 @@ def wrapper(left, right): return construct_result(left, result, index=left.index, name=res_name, dtype=None) + wrapper.__name__ = op_name return wrapper @@ -1646,6 +1692,7 @@ def na_op(x, y): return result + # @unpack_and_defer def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: @@ -1802,6 +1849,7 @@ def na_op(x, y): fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) + # @unpack_and_defer def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) @@ -2232,10 +2280,9 @@ def _arith_method_SPARSE_SERIES(cls, op, special): """ op_name = _get_op_name(op, special) + @unpack_and_defer def wrapper(self, other): - if isinstance(other, ABCDataFrame): - return NotImplemented - elif isinstance(other, ABCSeries): + if isinstance(other, ABCSeries): if not isinstance(other, ABCSparseSeries): other = other.to_sparse(fill_value=self.fill_value) return _sparse_series_op(self, other, op, op_name) @@ -2271,13 +2318,11 @@ def _arith_method_SPARSE_ARRAY(cls, op, special): """ op_name = _get_op_name(op, special) + @unpack_and_defer def wrapper(self, other): from pandas.core.arrays.sparse.array import ( SparseArray, _sparse_array_op, _wrap_result, _get_fill) if isinstance(other, np.ndarray): - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), other=len(other))) if not isinstance(other, SparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 5404d3f5f1915..074a7c5ffc24c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1409,8 +1409,16 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box_with_array) - with pytest.raises(TypeError): + + # For TimedeltaArray and TimedeltaIndex, the length check occurs + # before a TypeError can occur. For Series and DataFrame, + # idx[:3] gets reindexed to match idx, so a TypeError is raised + err = ValueError + if box_with_array in [pd.Series, pd.DataFrame]: + err = TypeError + with pytest.raises(err): idx * idx[:3] + with pytest.raises(ValueError): idx * np.array([1, 2]) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index f216865faa2ad..2dc2d12f4d2dd 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import operator import numpy as np import pytest @@ -119,9 +120,34 @@ def f(): res = cat_rev > "b" tm.assert_numpy_array_equal(res, exp) + # check that zero-dim array gets unboxed + res = cat_rev > np.array("b") + tm.assert_numpy_array_equal(res, exp) + class TestCategoricalOps(object): + def test_compare_frame(self): + # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame + data = ["a", "b", 2, "a"] + cat = Categorical(data) + + df = DataFrame(cat) + + for op in [operator.eq, operator.ne, operator.ge, + operator.gt, operator.le, operator.lt]: + with pytest.raises(ValueError): + # alignment raises unless we transpose + op(cat, df) + + result = cat == df.T + expected = DataFrame([[True, True, True, True]]) + tm.assert_frame_equal(result, expected) + + result = cat[::-1] != df.T + expected = DataFrame([[False, True, True, False]]) + tm.assert_frame_equal(result, expected) + def test_datetime_categorical_comparison(self): dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True) tm.assert_numpy_array_equal(dt_cat > dt_cat[0],