From c73f7141987f9486826055a482236b36c3aeb859 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 13 Dec 2018 21:34:07 -0800 Subject: [PATCH 1/9] implement unpack_and_defer --- pandas/core/arrays/datetimes.py | 2 - pandas/core/arrays/sparse.py | 21 ++--- pandas/core/arrays/timedeltas.py | 97 ++++++--------------- pandas/core/indexes/base.py | 8 +- pandas/core/indexes/range.py | 9 +- pandas/core/ops.py | 57 ++++++++++-- pandas/tests/arithmetic/test_timedelta64.py | 4 +- 7 files changed, 96 insertions(+), 102 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4849ee1e3e665..744e90caeb225 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -489,8 +489,6 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" - if len(self) != len(other): - raise ValueError("cannot add indices of unequal length") if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9e1d2efc21b81..c8f10802fc7ff 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -36,6 +36,7 @@ from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.base import PandasObject import pandas.core.common as com +from pandas.core import ops from pandas.core.missing import interpolate_2d import pandas.io.formats.printing as printing @@ -1650,13 +1651,11 @@ def sparse_unary_method(self): @classmethod def _create_arithmetic_method(cls, op): + + @ops.unpack_and_defer def sparse_arithmetic_method(self, other): op_name = op.__name__ - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to dispatch to us. - return NotImplemented - if isinstance(other, SparseArray): return _sparse_array_op(self, other, op, op_name) @@ -1678,10 +1677,6 @@ def sparse_arithmetic_method(self, other): with np.errstate(all='ignore'): # TODO: delete sparse stuff in core/ops.py # TODO: look into _wrap_result - if len(self) != len(other): - raise AssertionError( - ("length mismatch: {self} vs. {other}".format( - self=len(self), other=len(other)))) if not isinstance(other, SparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, @@ -1693,26 +1688,20 @@ def sparse_arithmetic_method(self, other): @classmethod def _create_comparison_method(cls, op): + + @ops.unpack_and_defer def cmp_method(self, other): op_name = op.__name__ if op_name in {'and_', 'or_'}: op_name = op_name[:-1] - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to unbox and dispatch to us. - return NotImplemented - if not is_scalar(other) and not isinstance(other, type(self)): # convert list-like to ndarray other = np.asarray(other) if isinstance(other, np.ndarray): # TODO: make this more flexible than just ndarray... - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), - other=len(other))) other = SparseArray(other, fill_value=self.fill_value) if isinstance(other, SparseArray): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9b7e1986e4831..95cd935260101 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -307,11 +307,8 @@ def _addsub_offset_array(self, other, op): raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" .format(cls=type(self).__name__)) + @ops.unpack_and_defer def __mul__(self, other): - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): - return NotImplemented if is_scalar(other): # numpy will accept float and int, raise TypeError for others @@ -324,10 +321,6 @@ def __mul__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self) and not is_timedelta64_dtype(other): - # Exclude timedelta64 here so we correctly raise TypeError - # for that instead of ValueError - raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other): # this multiplication will succeed only if all elements of other @@ -343,12 +336,9 @@ def __mul__(self, other): __rmul__ = __mul__ + @ops.unpack_and_defer def __truediv__(self, other): # timedelta / X is well-defined for timedelta-like or numeric X - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -370,14 +360,7 @@ def __truediv__(self, other): freq = self.freq.delta / other return type(self)(result, freq=freq) - if not hasattr(other, "dtype"): - # e.g. list, tuple - other = np.array(other) - - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): # let numpy handle it return self._data / other @@ -393,12 +376,9 @@ def __truediv__(self, other): result = self._data / other return type(self)(result) + @ops.unpack_and_defer def __rtruediv__(self, other): # X / timedelta is defined only for timedelta-like X - other = lib.item_from_zerodim(other) - - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -416,14 +396,7 @@ def __rtruediv__(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) - if not hasattr(other, "dtype"): - # e.g. list, tuple - other = np.array(other) - - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): # let numpy handle it return other / self._data @@ -443,11 +416,9 @@ def __rtruediv__(self, other): __div__ = __truediv__ __rdiv__ = __rtruediv__ + @ops.unpack_and_defer def __floordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -471,13 +442,7 @@ def __floordiv__(self, other): freq = self.freq / other return type(self)(result.view('m8[ns]'), freq=freq) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -506,11 +471,9 @@ def __floordiv__(self, other): raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__)) + @ops.unpack_and_defer def __rfloordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - other = lib.item_from_zerodim(other) if is_scalar(other): if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) @@ -528,13 +491,7 @@ def __rfloordiv__(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - - elif is_timedelta64_dtype(other): + if is_timedelta64_dtype(other): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -556,47 +513,51 @@ def __rfloordiv__(self, other): raise TypeError("Cannot divide {typ} by {cls}" .format(typ=dtype, cls=type(self).__name__)) + @ops.unpack_and_defer def __mod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) + if other is NaT: + # convert back to something that floordiv will recognize + # as timedelta-like + other = np.timedelta64('NaT', 'ns') return self - (self // other) * other + @ops.unpack_and_defer def __rmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) + if other is NaT: + # convert back to something that floordiv will recognize + # as timedelta-like + other = np.timedelta64('NaT', 'ns') return other - (other // self) * self + @ops.unpack_and_defer def __divmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) + if other is NaT: + # convert back to something that floordiv will recognize + # as timedelta-like + other = np.timedelta64('NaT', 'ns') res1 = self // other res2 = self - res1 * other return res1, res2 + @ops.unpack_and_defer def __rdivmod__(self, other): # Note: This is a naive implementation, can likely be optimized - if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): - return NotImplemented - - other = lib.item_from_zerodim(other) if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) + if other is NaT: + # convert back to something that floordiv will recognize + # as timedelta-like + other = np.timedelta64('NaT', 'ns') res1 = other // self res2 = other - res1 * self diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 63c0827cccb82..345b057c19aa9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -63,6 +63,8 @@ def _try_get_item(x): def _make_comparison_op(op, cls): + + # @ops.unpack_and_defer def cmp_method(self, other): if isinstance(other, (np.ndarray, Index, ABCSeries)): if other.ndim > 0 and len(self) != len(other): @@ -98,10 +100,9 @@ def cmp_method(self, other): def _make_arithmetic_op(op, cls): + @ops.unpack_and_defer def index_arithmetic_method(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - elif isinstance(other, ABCTimedeltaIndex): + if isinstance(other, ABCTimedeltaIndex): # Defer to subclass implementation return NotImplemented elif (isinstance(other, (np.ndarray, ABCTimedeltaArray)) and @@ -2201,6 +2202,7 @@ def _get_unique_index(self, dropna=False): # -------------------------------------------------------------------- # Arithmetic & Logical Methods + # @ops.unpack_and_defer def __add__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0da924de244ed..04aa95830777d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -576,10 +576,8 @@ def __getitem__(self, key): # fall back to Int64Index return super_getitem(key) + @ops.unpack_and_defer def __floordiv__(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - if is_integer(other) and other != 0: if (len(self) == 0 or self._start % other == 0 and @@ -610,10 +608,9 @@ def _make_evaluate_binop(op, step=False): if False, use the existing step """ + @ops.unpack_and_defer def _evaluate_numeric_binop(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - elif isinstance(other, ABCTimedeltaIndex): + if isinstance(other, ABCTimedeltaIndex): # Defer to TimedeltaIndex implementation return NotImplemented elif isinstance(other, (timedelta, np.timedelta64)): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 41e3f4581587e..a528d67a49802 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -7,6 +7,7 @@ from __future__ import division import datetime +from functools import wraps import operator import textwrap import warnings @@ -136,6 +137,49 @@ def maybe_upcast_for_op(obj): return obj +def unpack_and_defer(func): + """ + Decorate a binary method to return NotImplemented where appropriate, + unpack arguments, and validate argument shapes. + + Parameters + ---------- + func : binary method + + Returns + ------- + decorated : binary method + """ + + @wraps(func) + def method(self, other): + other = lib.item_from_zerodim(other) + # TODO: maybe_upcast_for_op? + + for abc_cls in [ABCDataFrame, ABCSeries, ABCIndexClass]: + if isinstance(self, abc_cls): + break + elif isinstance(other, abc_cls): + return NotImplemented + + if is_list_like(other): + if not hasattr(other, 'dtype'): + # list, tuple... + if np.ndim(other) == 0: + # i.e. generator + other = list(other) + other = np.array(other) + + if not hasattr(self, 'index') and len(other) != len(self): + # Series and DataFrame may perform alignment, so this check + # is only relevant for Index and ExtensionArray + raise ValueError("Lengths must match") + + return func(self, other) + + return method + + # ----------------------------------------------------------------------------- # Reversed Operations not available in the stdlib operator module. # Defining these instead of using lambdas allows us to reference them by name. @@ -1526,6 +1570,7 @@ def safe_na_op(lvalues, rvalues): lambda x: op(x, rvalues)) raise + # @unpack_and_defer def wrapper(left, right): if isinstance(right, ABCDataFrame): return NotImplemented @@ -1574,6 +1619,7 @@ def wrapper(left, right): return construct_result(left, result, index=left.index, name=res_name, dtype=None) + wrapper.__name__ = op_name return wrapper @@ -1646,6 +1692,7 @@ def na_op(x, y): return result + # @unpack_and_defer def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: @@ -1802,6 +1849,7 @@ def na_op(x, y): fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) + # @unpack_and_defer def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) @@ -2232,10 +2280,9 @@ def _arith_method_SPARSE_SERIES(cls, op, special): """ op_name = _get_op_name(op, special) + @unpack_and_defer def wrapper(self, other): - if isinstance(other, ABCDataFrame): - return NotImplemented - elif isinstance(other, ABCSeries): + if isinstance(other, ABCSeries): if not isinstance(other, ABCSparseSeries): other = other.to_sparse(fill_value=self.fill_value) return _sparse_series_op(self, other, op, op_name) @@ -2271,13 +2318,11 @@ def _arith_method_SPARSE_ARRAY(cls, op, special): """ op_name = _get_op_name(op, special) + @unpack_and_defer def wrapper(self, other): from pandas.core.arrays.sparse.array import ( SparseArray, _sparse_array_op, _wrap_result, _get_fill) if isinstance(other, np.ndarray): - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), other=len(other))) if not isinstance(other, SparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 5404d3f5f1915..6ff67fec11b4f 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1409,7 +1409,9 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box_with_array) - with pytest.raises(TypeError): + with pytest.raises(ValueError): + # length check occurs before type check, otherwise this would be + # a TypeError idx * idx[:3] with pytest.raises(ValueError): idx * np.array([1, 2]) From b33f45621c4722186925f59da74db534293bea26 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 08:52:07 -0800 Subject: [PATCH 2/9] fix errors checked in test_td64arr_mul_too_short_raises --- pandas/core/arrays/categorical.py | 2 ++ pandas/core/arrays/datetimelike.py | 4 ++++ pandas/core/arrays/datetimes.py | 1 + pandas/core/arrays/integer.py | 4 ++++ pandas/core/arrays/period.py | 1 + pandas/core/arrays/sparse.py | 2 +- pandas/core/arrays/timedeltas.py | 5 +---- pandas/core/ops.py | 4 ++-- pandas/tests/arithmetic/test_timedelta64.py | 12 +++++++++--- 9 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6ccb8dc5d2725..713f62cb879eb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -53,6 +53,8 @@ def _cat_compare_op(op): + + # @ops.unpack_and_defer def f(self, other): # On python2, you can usually compare any type to any type, and # Categoricals can be seen as a custom type, but having different diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ceaf9e748fe5a..8259338b74bf6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,6 +36,8 @@ def _make_comparison_op(cls, op): # TODO: share code with indexes.base version? Main difference is that # the block for MultiIndex was removed here. + + # @ops.unpack_and_defer def cmp_method(self, other): if isinstance(other, ABCDataFrame): return NotImplemented @@ -852,6 +854,7 @@ def _time_shift(self, periods, freq=None): return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + # @ops.unpack_and_defer def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): @@ -913,6 +916,7 @@ def __radd__(self, other): # alias for __add__ return self.__add__(other) + # @ops.unpack_and_defer def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 744e90caeb225..dd7a318c76392 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -93,6 +93,7 @@ def _dt_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False + # @ops.unpack_and_defer def wrapper(self, other): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 38dc68e8f77a3..4e5c857ed9553 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -483,6 +483,8 @@ def _values_for_argsort(self): @classmethod def _create_comparison_method(cls, op): + + # @ops.unwrap_and_defer def cmp_method(self, other): op_name = op.__name__ @@ -573,6 +575,8 @@ def _maybe_mask_result(self, result, mask, other, op_name): @classmethod def _create_arithmetic_method(cls, op): + + # @ops.unwrap_and_defer def integer_arithmetic_method(self, other): op_name = op.__name__ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d9dde1c699761..fc54ed1bde3bd 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -51,6 +51,7 @@ def _period_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False + # @ops.unwrap_and_defer def wrapper(self, other): op = getattr(self.asi8, opname) # We want to eventually defer to the Series or PeriodIndex (which will diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index c8f10802fc7ff..e48aec9d35b53 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -31,12 +31,12 @@ ABCIndexClass, ABCSeries, ABCSparseSeries) from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core import ops from pandas.core.missing import interpolate_2d import pandas.io.formats.printing as printing diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 95cd935260101..2df2ecb94901a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -73,6 +73,7 @@ def _td_array_cmp(cls, op): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) + # @ops.unpack_and_defer def wrapper(self, other): if _is_convertible_to_td(other) or other is NaT: try: @@ -318,10 +319,6 @@ def __mul__(self, other): freq = self.freq * other return type(self)(result, freq=freq) - if not hasattr(other, "dtype"): - # list, tuple - other = np.array(other) - if is_object_dtype(other): # this multiplication will succeed only if all elements of other # are int or float scalars, so we will end up with diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a528d67a49802..9b83faa30b2fc 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -171,8 +171,8 @@ def method(self, other): other = np.array(other) if not hasattr(self, 'index') and len(other) != len(self): - # Series and DataFrame may perform alignment, so this check - # is only relevant for Index and ExtensionArray + # Series and DataFrame may have alignment calls after this, + # so we cannot do a length check yet raise ValueError("Lengths must match") return func(self, other) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 6ff67fec11b4f..074a7c5ffc24c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1409,10 +1409,16 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box_with_array) - with pytest.raises(ValueError): - # length check occurs before type check, otherwise this would be - # a TypeError + + # For TimedeltaArray and TimedeltaIndex, the length check occurs + # before a TypeError can occur. For Series and DataFrame, + # idx[:3] gets reindexed to match idx, so a TypeError is raised + err = ValueError + if box_with_array in [pd.Series, pd.DataFrame]: + err = TypeError + with pytest.raises(err): idx * idx[:3] + with pytest.raises(ValueError): idx * np.array([1, 2]) From 2f929af6611f6615fcc94027dbc25b7199d5627b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 11:08:50 -0800 Subject: [PATCH 3/9] enable for Integer comparisons --- pandas/core/arrays/categorical.py | 3 +++ pandas/core/arrays/integer.py | 17 +++-------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 713f62cb879eb..1784b7400c735 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -54,6 +54,9 @@ def _cat_compare_op(op): + # Note: using unpack_and_defer here doesn't break any tests, but the + # behavior here is idiosyncratic enough that I'm not confident enough + # to change it. # @ops.unpack_and_defer def f(self, other): # On python2, you can usually compare any type to any type, and diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4e5c857ed9553..c59d3c4c1dbe6 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, notna -from pandas.core import nanops +from pandas.core import nanops, ops from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin @@ -484,26 +484,15 @@ def _values_for_argsort(self): @classmethod def _create_comparison_method(cls, op): - # @ops.unwrap_and_defer + @ops.unpack_and_defer def cmp_method(self, other): op_name = op.__name__ mask = None - if isinstance(other, (ABCSeries, ABCIndexClass)): - # Rely on pandas to unbox and dispatch to us. - return NotImplemented - if isinstance(other, IntegerArray): other, mask = other._data, other._mask - elif is_list_like(other): - other = np.asarray(other) - if other.ndim > 0 and len(self) != len(other): - raise ValueError('Lengths must match to compare') - - other = lib.item_from_zerodim(other) - # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(): @@ -576,7 +565,7 @@ def _maybe_mask_result(self, result, mask, other, op_name): @classmethod def _create_arithmetic_method(cls, op): - # @ops.unwrap_and_defer + # @ops.unpack_and_defer def integer_arithmetic_method(self, other): op_name = op.__name__ From 29212f2cf14837db7f48c1c345d7349f679795c7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 12:15:14 -0800 Subject: [PATCH 4/9] fixup remove unused imports --- pandas/core/arrays/timedeltas.py | 3 +-- pandas/core/indexes/range.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c26ff6a69b931..4884b3b774b21 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,8 +19,7 @@ is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna from pandas.core import ops diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 04aa95830777d..030bbab06a244 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,8 +14,7 @@ from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype) -from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCSeries, ABCTimedeltaIndex) +from pandas.core.dtypes.generic import ABCTimedeltaIndex from pandas.core import ops import pandas.core.common as com From fc0d4135d3bccf8972ef896bfa1e245d825d7f5b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 14:05:37 -0800 Subject: [PATCH 5/9] enable for categorical, revert non-central, tests for categorical with DataFrame --- pandas/core/arrays/categorical.py | 8 ++---- pandas/core/arrays/timedeltas.py | 16 ------------ .../arrays/categorical/test_operators.py | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1784b7400c735..4fba5ab0241e6 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -34,6 +34,7 @@ import pandas.core.common as com from pandas.core.config import get_option from pandas.core.missing import interpolate_2d +from pandas.core import ops from pandas.core.sorting import nargsort from pandas.io.formats import console @@ -54,18 +55,13 @@ def _cat_compare_op(op): - # Note: using unpack_and_defer here doesn't break any tests, but the - # behavior here is idiosyncratic enough that I'm not confident enough - # to change it. - # @ops.unpack_and_defer + @ops.unpack_and_defer def f(self, other): # On python2, you can usually compare any type to any type, and # Categoricals can be seen as a custom type, but having different # results depending whether categories are the same or not is kind of # insane, so be a bit stricter here and use the python3 idea of # comparing only things of equal type. - if isinstance(other, ABCSeries): - return NotImplemented if not self.ordered: if op in ['__lt__', '__gt__', '__le__', '__ge__']: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4884b3b774b21..3b2f561f6256a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -509,10 +509,6 @@ def __mod__(self, other): # Note: This is a naive implementation, can likely be optimized if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) - if other is NaT: - # convert back to something that floordiv will recognize - # as timedelta-like - other = np.timedelta64('NaT', 'ns') return self - (self // other) * other @ops.unpack_and_defer @@ -520,10 +516,6 @@ def __rmod__(self, other): # Note: This is a naive implementation, can likely be optimized if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) - if other is NaT: - # convert back to something that floordiv will recognize - # as timedelta-like - other = np.timedelta64('NaT', 'ns') return other - (other // self) * self @ops.unpack_and_defer @@ -531,10 +523,6 @@ def __divmod__(self, other): # Note: This is a naive implementation, can likely be optimized if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) - if other is NaT: - # convert back to something that floordiv will recognize - # as timedelta-like - other = np.timedelta64('NaT', 'ns') res1 = self // other res2 = self - res1 * other @@ -545,10 +533,6 @@ def __rdivmod__(self, other): # Note: This is a naive implementation, can likely be optimized if isinstance(other, (timedelta, np.timedelta64, Tick)): other = Timedelta(other) - if other is NaT: - # convert back to something that floordiv will recognize - # as timedelta-like - other = np.timedelta64('NaT', 'ns') res1 = other // self res2 = other - res1 * self diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index f216865faa2ad..2dc2d12f4d2dd 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import operator import numpy as np import pytest @@ -119,9 +120,34 @@ def f(): res = cat_rev > "b" tm.assert_numpy_array_equal(res, exp) + # check that zero-dim array gets unboxed + res = cat_rev > np.array("b") + tm.assert_numpy_array_equal(res, exp) + class TestCategoricalOps(object): + def test_compare_frame(self): + # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame + data = ["a", "b", 2, "a"] + cat = Categorical(data) + + df = DataFrame(cat) + + for op in [operator.eq, operator.ne, operator.ge, + operator.gt, operator.le, operator.lt]: + with pytest.raises(ValueError): + # alignment raises unless we transpose + op(cat, df) + + result = cat == df.T + expected = DataFrame([[True, True, True, True]]) + tm.assert_frame_equal(result, expected) + + result = cat[::-1] != df.T + expected = DataFrame([[False, True, True, False]]) + tm.assert_frame_equal(result, expected) + def test_datetime_categorical_comparison(self): dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True) tm.assert_numpy_array_equal(dt_cat > dt_cat[0], From 8ec63eb9e25d0fb54e7a84997bb3510498e762d8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 14:24:20 -0800 Subject: [PATCH 6/9] revert not-yet-ready --- pandas/core/arrays/datetimes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c2fc3f59d1867..3e2d8820c2904 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -505,6 +505,8 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) From 5522b6c4fd3335f949e7a702b3be0f61138a0727 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 15:48:34 -0800 Subject: [PATCH 7/9] use decorator in periodarray comparisons, update test messages --- pandas/core/arrays/period.py | 12 ++---------- pandas/tests/arithmetic/test_period.py | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 982d593af3dce..f4cb0883e517f 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -28,6 +28,7 @@ from pandas.core.arrays import ExtensionArray, datetimelike as dtl import pandas.core.common as com from pandas.core.missing import backfill_1d, pad_1d +from pandas.core import ops from pandas.tseries import frequencies from pandas.tseries.offsets import Tick @@ -51,16 +52,9 @@ def _period_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False - # @ops.unwrap_and_defer + @ops.unpack_and_defer def wrapper(self, other): op = getattr(self.asi8, opname) - # We want to eventually defer to the Series or PeriodIndex (which will - # return here with an unboxed PeriodArray). But before we do that, - # we do a bit of validation on type (Period) and freq, so that our - # error messages are sensible - not_implemented = isinstance(other, (ABCSeries, ABCIndexClass)) - if not_implemented: - other = other._values if isinstance(other, Period): self._check_compatible_with(other) @@ -69,8 +63,6 @@ def wrapper(self, other): elif isinstance(other, cls): self._check_compatible_with(other) - if not_implemented: - return NotImplemented result = op(other.asi8) mask = self._isnan | other._isnan diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 9f436281de0a0..6359356455e18 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -134,7 +134,11 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): freq=freq) base = tm.box_expected(base, box_with_array) - msg = "Input has different freq=A-DEC from " + # which freq is freq1 vs freq2 depends on box_with_array + base_msg = (r"Input has different freq={freq1} from " + r"Period(|Index|Array)\(freq={freq2}\)") + + msg = base_msg.format(freq1="A-DEC", freq2=freq) with pytest.raises(IncompatibleFrequency, match=msg): base <= Period('2011', freq='A') @@ -143,11 +147,16 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): # TODO: Could parametrize over boxes for idx? idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') + + msg = base_msg.format(freq1=freq, freq2=idx.freqstr) + if box_with_array in [pd.Series, pd.DataFrame]: + msg = base_msg.format(freq1=idx.freqstr, freq2=freq) + with pytest.raises(IncompatibleFrequency, match=msg): base <= idx # Different frequency - msg = "Input has different freq=4M from " + msg = base_msg.format(freq1="4M", freq2=freq) with pytest.raises(IncompatibleFrequency, match=msg): base <= Period('2011', freq='4M') @@ -155,6 +164,11 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): Period('2011', freq='4M') >= base idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') + + msg = base_msg.format(freq1=freq, freq2=idx.freqstr) + if box_with_array in [pd.Series, pd.DataFrame]: + msg = base_msg.format(freq1=idx.freqstr, freq2=freq) + with pytest.raises(IncompatibleFrequency, match=msg): base <= idx @@ -206,7 +220,9 @@ def test_pi_cmp_nat_mismatched_freq_raises(self, freq): idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') - msg = "Input has different freq=4M from PeriodIndex" + msg = (r"Input has different freq={freq} from " + r"Period(Index|Array)\(freq=4M\)" + .format(freq=freq)) with pytest.raises(IncompatibleFrequency, match=msg): idx1 > diff From 5b8160000c02dd401a8904261abf58f346ac2c98 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 16:16:53 -0800 Subject: [PATCH 8/9] revert application --- pandas/core/arrays/integer.py | 1 + pandas/core/arrays/period.py | 12 ++++++++++-- pandas/core/arrays/sparse.py | 2 ++ pandas/tests/arithmetic/test_period.py | 22 +++------------------- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c59d3c4c1dbe6..facff9e0934b7 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -484,6 +484,7 @@ def _values_for_argsort(self): @classmethod def _create_comparison_method(cls, op): + # TODO: needs tests for deferring to DataFrame @ops.unpack_and_defer def cmp_method(self, other): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f4cb0883e517f..12df91818f762 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -28,7 +28,6 @@ from pandas.core.arrays import ExtensionArray, datetimelike as dtl import pandas.core.common as com from pandas.core.missing import backfill_1d, pad_1d -from pandas.core import ops from pandas.tseries import frequencies from pandas.tseries.offsets import Tick @@ -52,9 +51,16 @@ def _period_array_cmp(cls, op): opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False - @ops.unpack_and_defer + # @ops.unpack_and_defer def wrapper(self, other): op = getattr(self.asi8, opname) + # We want to eventually defer to the Series or PeriodIndex (which will + # return here with an unboxed PeriodArray). But before we do that, + # we do a bit of validation on type (Period) and freq, so that our + # error messages are sensible + not_implemented = isinstance(other, (ABCSeries, ABCIndexClass)) + if not_implemented: + other = other._values if isinstance(other, Period): self._check_compatible_with(other) @@ -63,6 +69,8 @@ def wrapper(self, other): elif isinstance(other, cls): self._check_compatible_with(other) + if not_implemented: + return NotImplemented result = op(other.asi8) mask = self._isnan | other._isnan diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index e48aec9d35b53..ba2fef5f5bb73 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1652,6 +1652,7 @@ def sparse_unary_method(self): @classmethod def _create_arithmetic_method(cls, op): + # TODO: needs test for deferring to DataFrame, op with 0-dim @ops.unpack_and_defer def sparse_arithmetic_method(self, other): op_name = op.__name__ @@ -1689,6 +1690,7 @@ def sparse_arithmetic_method(self, other): @classmethod def _create_comparison_method(cls, op): + # TODO: needs test for deferring to DataFrame, op with 0-dim @ops.unpack_and_defer def cmp_method(self, other): op_name = op.__name__ diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 6359356455e18..9f436281de0a0 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -134,11 +134,7 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): freq=freq) base = tm.box_expected(base, box_with_array) - # which freq is freq1 vs freq2 depends on box_with_array - base_msg = (r"Input has different freq={freq1} from " - r"Period(|Index|Array)\(freq={freq2}\)") - - msg = base_msg.format(freq1="A-DEC", freq2=freq) + msg = "Input has different freq=A-DEC from " with pytest.raises(IncompatibleFrequency, match=msg): base <= Period('2011', freq='A') @@ -147,16 +143,11 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): # TODO: Could parametrize over boxes for idx? idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - - msg = base_msg.format(freq1=freq, freq2=idx.freqstr) - if box_with_array in [pd.Series, pd.DataFrame]: - msg = base_msg.format(freq1=idx.freqstr, freq2=freq) - with pytest.raises(IncompatibleFrequency, match=msg): base <= idx # Different frequency - msg = base_msg.format(freq1="4M", freq2=freq) + msg = "Input has different freq=4M from " with pytest.raises(IncompatibleFrequency, match=msg): base <= Period('2011', freq='4M') @@ -164,11 +155,6 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): Period('2011', freq='4M') >= base idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - - msg = base_msg.format(freq1=freq, freq2=idx.freqstr) - if box_with_array in [pd.Series, pd.DataFrame]: - msg = base_msg.format(freq1=idx.freqstr, freq2=freq) - with pytest.raises(IncompatibleFrequency, match=msg): base <= idx @@ -220,9 +206,7 @@ def test_pi_cmp_nat_mismatched_freq_raises(self, freq): idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') - msg = (r"Input has different freq={freq} from " - r"Period(Index|Array)\(freq=4M\)" - .format(freq=freq)) + msg = "Input has different freq=4M from PeriodIndex" with pytest.raises(IncompatibleFrequency, match=msg): idx1 > diff From 4d481008a686f5a99ecfc337fbf1389c367e2f35 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 17:53:47 -0800 Subject: [PATCH 9/9] isort fixup --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4fba5ab0241e6..5f46d27fe243a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,6 +27,7 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna +from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms from pandas.core.algorithms import factorize, take, take_1d, unique1d @@ -34,7 +35,6 @@ import pandas.core.common as com from pandas.core.config import get_option from pandas.core.missing import interpolate_2d -from pandas.core import ops from pandas.core.sorting import nargsort from pandas.io.formats import console