Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: decorator for ops boilerplate #24282

Closed
wants to merge 12 commits into from
Closed
5 changes: 5 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@


def _cat_compare_op(op):

# Note: using unpack_and_defer here doesn't break any tests, but the
# behavior here is idiosyncratic enough that I'm not confident enough
# to change it.
# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger are you the right person to ask about Categorical methods? In particular, why does this only return NotImplemented for ABCSeries and not for ABCDataFrame and ABCIndexClass?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea. Returning NotImplemented seems reasonable, as long as it ends up back here with the unboxed values.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Categorical == DataFrame is weird both before and after

data = ["a", "b", 2, "a"]
cat = pd.Categorical(data)
idx = pd.CategoricalIndex(cat)
ser = pd.Series(cat)
df = pd.DataFrame(cat)

master

>>> cat == df
array([[ True, False, False,  True],
       [False,  True, False, False],
       [False, False,  True, False],
       [ True, False, False,  True]])

PR

>>> cat == df
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pandas/core/ops.py", line 2127, in f
    other = _align_method_FRAME(self, other, axis=None)
  File "pandas/core/ops.py", line 2021, in _align_method_FRAME
    right = to_series(right)
  File "pandas/core/ops.py", line 1983, in to_series
    given_len=len(right)))
ValueError: Unable to coerce to Series, length must be 1: given 4

If we transpose then we get all-True, the only difference being that in the PR the result is a DataFrame instead of an ndarray.

def f(self, other):
# On python2, you can usually compare any type to any type, and
# Categoricals can be seen as a custom type, but having different
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
def _make_comparison_op(cls, op):
# TODO: share code with indexes.base version? Main difference is that
# the block for MultiIndex was removed here.

# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using this here leads to recursion errors, related to the fact that this only returns NotImplemented for ABCDataFrame. I think this will be easier to resolve after the change to composition.

def cmp_method(self, other):
if isinstance(other, ABCDataFrame):
return NotImplemented
Expand Down Expand Up @@ -885,6 +887,7 @@ def _time_shift(self, periods, freq=None):
return self._generate_range(start=start, end=end, periods=None,
freq=self.freq)

# @ops.unpack_and_defer
def __add__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, (ABCSeries, ABCDataFrame)):
Expand Down Expand Up @@ -946,6 +949,7 @@ def __radd__(self, other):
# alias for __add__
return self.__add__(other)

# @ops.unpack_and_defer
def __sub__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, (ABCSeries, ABCDataFrame)):
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _dt_array_cmp(cls, op):
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

# @ops.unpack_and_defer
def wrapper(self, other):
meth = getattr(dtl.DatetimeLikeArrayMixin, opname)

Expand Down Expand Up @@ -504,8 +505,6 @@ def _assert_tzawareness_compat(self, other):

def _sub_datetime_arraylike(self, other):
"""subtract DatetimeArray/Index or ndarray[datetime64]"""
if len(self) != len(other):
raise ValueError("cannot add indices of unequal length")
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(other, np.ndarray):
assert is_datetime64_dtype(other)
Expand Down
17 changes: 5 additions & 12 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops
from pandas.core import nanops, ops
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin


Expand Down Expand Up @@ -483,25 +483,16 @@ def _values_for_argsort(self):

@classmethod
def _create_comparison_method(cls, op):

@ops.unpack_and_defer
def cmp_method(self, other):

op_name = op.__name__
mask = None

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented

if isinstance(other, IntegerArray):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 0 and len(self) != len(other):
raise ValueError('Lengths must match to compare')

other = lib.item_from_zerodim(other)

# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings():
Expand Down Expand Up @@ -573,6 +564,8 @@ def _maybe_mask_result(self, result, mask, other, op_name):

@classmethod
def _create_arithmetic_method(cls, op):

# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback using the decorator here breaks a bunch of tests, specifically ones operating with DataFrame. Why does this return NotImplemented for Series/Index but not DataFrame?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below, why use other = other.item() instead of item_from_zerodim?

def integer_arithmetic_method(self, other):

op_name = op.__name__
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def _period_array_cmp(cls, op):
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

# @ops.unwrap_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger any idea why not returning NotImplemented for DataFrame?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure... I vaguely remember a broadcasting error, but that may have been user error.

def wrapper(self, other):
op = getattr(self.asi8, opname)
# We want to eventually defer to the Series or PeriodIndex (which will
Expand Down
21 changes: 5 additions & 16 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
ABCIndexClass, ABCSeries, ABCSparseSeries)
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

from pandas.core import ops
from pandas.core.accessor import PandasDelegate, delegate_names
import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
Expand Down Expand Up @@ -1650,13 +1651,11 @@ def sparse_unary_method(self):

@classmethod
def _create_arithmetic_method(cls, op):

@ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both here and below this is technically a change since in the status quo this doesn't defer to DataFrame. Also doesn't call item_from_zerodim ATM.

def sparse_arithmetic_method(self, other):
op_name = op.__name__

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to dispatch to us.
return NotImplemented

if isinstance(other, SparseArray):
return _sparse_array_op(self, other, op, op_name)

Expand All @@ -1678,10 +1677,6 @@ def sparse_arithmetic_method(self, other):
with np.errstate(all='ignore'):
# TODO: delete sparse stuff in core/ops.py
# TODO: look into _wrap_result
if len(self) != len(other):
raise AssertionError(
("length mismatch: {self} vs. {other}".format(
self=len(self), other=len(other))))
if not isinstance(other, SparseArray):
dtype = getattr(other, 'dtype', None)
other = SparseArray(other, fill_value=self.fill_value,
Expand All @@ -1693,26 +1688,20 @@ def sparse_arithmetic_method(self, other):

@classmethod
def _create_comparison_method(cls, op):

@ops.unpack_and_defer
def cmp_method(self, other):
op_name = op.__name__

if op_name in {'and_', 'or_'}:
op_name = op_name[:-1]

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented

if not is_scalar(other) and not isinstance(other, type(self)):
# convert list-like to ndarray
other = np.asarray(other)

if isinstance(other, np.ndarray):
# TODO: make this more flexible than just ndarray...
if len(self) != len(other):
raise AssertionError("length mismatch: {self} vs. {other}"
.format(self=len(self),
other=len(other)))
other = SparseArray(other, fill_value=self.fill_value)

if isinstance(other, SparseArray):
Expand Down
102 changes: 30 additions & 72 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _td_array_cmp(cls, op):

meth = getattr(dtl.DatetimeLikeArrayMixin, opname)

# @ops.unpack_and_defer
def wrapper(self, other):
if _is_convertible_to_td(other) or other is NaT:
try:
Expand Down Expand Up @@ -302,11 +303,8 @@ def _addsub_offset_array(self, other, op):
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
.format(cls=type(self).__name__))

@ops.unpack_and_defer
def __mul__(self, other):
other = lib.item_from_zerodim(other)

if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
return NotImplemented

if is_scalar(other):
# numpy will accept float and int, raise TypeError for others
Expand All @@ -316,14 +314,6 @@ def __mul__(self, other):
freq = self.freq * other
return type(self)(result, freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self) and not is_timedelta64_dtype(other):
# Exclude timedelta64 here so we correctly raise TypeError
# for that instead of ValueError
raise ValueError("Cannot multiply with unequal lengths")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a small change (reflected in a test below). tdarr * tdarr[:-1] checks for length mismatch before checking for dtype compat, so now raises ValueError instead of TypeError.


if is_object_dtype(other):
# this multiplication will succeed only if all elements of other
# are int or float scalars, so we will end up with
Expand All @@ -338,12 +328,9 @@ def __mul__(self, other):

__rmul__ = __mul__

@ops.unpack_and_defer
def __truediv__(self, other):
# timedelta / X is well-defined for timedelta-like or numeric X
other = lib.item_from_zerodim(other)

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
Expand All @@ -365,14 +352,7 @@ def __truediv__(self, other):
freq = self.freq.delta / other
return type(self)(result, freq=freq)

if not hasattr(other, "dtype"):
# e.g. list, tuple
other = np.array(other)

if len(other) != len(self):
raise ValueError("Cannot divide vectors with unequal lengths")

elif is_timedelta64_dtype(other):
if is_timedelta64_dtype(other):
# let numpy handle it
return self._data / other

Expand All @@ -388,12 +368,9 @@ def __truediv__(self, other):
result = self._data / other
return type(self)(result)

@ops.unpack_and_defer
def __rtruediv__(self, other):
# X / timedelta is defined only for timedelta-like X
other = lib.item_from_zerodim(other)

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
Expand All @@ -411,14 +388,7 @@ def __rtruediv__(self, other):
.format(typ=type(other).__name__,
cls=type(self).__name__))

if not hasattr(other, "dtype"):
# e.g. list, tuple
other = np.array(other)

if len(other) != len(self):
raise ValueError("Cannot divide vectors with unequal lengths")

elif is_timedelta64_dtype(other):
if is_timedelta64_dtype(other):
# let numpy handle it
return other / self._data

Expand All @@ -438,11 +408,9 @@ def __rtruediv__(self, other):
__div__ = __truediv__
__rdiv__ = __rtruediv__

@ops.unpack_and_defer
def __floordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
Expand All @@ -466,13 +434,7 @@ def __floordiv__(self, other):
freq = self.freq / other
return type(self)(result.view('m8[ns]'), freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
if is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
Expand Down Expand Up @@ -501,11 +463,9 @@ def __floordiv__(self, other):
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

@ops.unpack_and_defer
def __rfloordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
Expand All @@ -523,13 +483,7 @@ def __rfloordiv__(self, other):
.format(typ=type(other).__name__,
cls=type(self).__name__))

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
if is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
Expand All @@ -551,47 +505,51 @@ def __rfloordiv__(self, other):
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

@ops.unpack_and_defer
def __mod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# convert back to something that floordiv will recognize
# as timedelta-like
other = np.timedelta64('NaT', 'ns')
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
return self - (self // other) * other

@ops.unpack_and_defer
def __rmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# convert back to something that floordiv will recognize
# as timedelta-like
other = np.timedelta64('NaT', 'ns')
return other - (other // self) * self

@ops.unpack_and_defer
def __divmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# convert back to something that floordiv will recognize
# as timedelta-like
other = np.timedelta64('NaT', 'ns')

res1 = self // other
res2 = self - res1 * other
return res1, res2

@ops.unpack_and_defer
def __rdivmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# convert back to something that floordiv will recognize
# as timedelta-like
other = np.timedelta64('NaT', 'ns')

res1 = other // self
res2 = other - res1 * self
Expand Down
Loading