-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: decorator for ops boilerplate #24282
Changes from 4 commits
c73f714
b33f456
f516968
2f929af
29212f2
fc0d413
84cedf7
8ec63eb
30992b2
5522b6c
5b81600
4d48100
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,8 @@ | |
def _make_comparison_op(cls, op): | ||
# TODO: share code with indexes.base version? Main difference is that | ||
# the block for MultiIndex was removed here. | ||
|
||
# @ops.unpack_and_defer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using this here leads to recursion errors, related to the fact that this only returns NotImplemented for ABCDataFrame. I think this will be easier to resolve after the change to composition. |
||
def cmp_method(self, other): | ||
if isinstance(other, ABCDataFrame): | ||
return NotImplemented | ||
|
@@ -885,6 +887,7 @@ def _time_shift(self, periods, freq=None): | |
return self._generate_range(start=start, end=end, periods=None, | ||
freq=self.freq) | ||
|
||
# @ops.unpack_and_defer | ||
def __add__(self, other): | ||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (ABCSeries, ABCDataFrame)): | ||
|
@@ -946,6 +949,7 @@ def __radd__(self, other): | |
# alias for __add__ | ||
return self.__add__(other) | ||
|
||
# @ops.unpack_and_defer | ||
def __sub__(self, other): | ||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (ABCSeries, ABCDataFrame)): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
from pandas.core import nanops | ||
from pandas.core import nanops, ops | ||
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin | ||
|
||
|
||
|
@@ -483,25 +483,16 @@ def _values_for_argsort(self): | |
|
||
@classmethod | ||
def _create_comparison_method(cls, op): | ||
|
||
@ops.unpack_and_defer | ||
def cmp_method(self, other): | ||
|
||
op_name = op.__name__ | ||
mask = None | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
# Rely on pandas to unbox and dispatch to us. | ||
return NotImplemented | ||
|
||
if isinstance(other, IntegerArray): | ||
other, mask = other._data, other._mask | ||
|
||
elif is_list_like(other): | ||
other = np.asarray(other) | ||
if other.ndim > 0 and len(self) != len(other): | ||
raise ValueError('Lengths must match to compare') | ||
|
||
other = lib.item_from_zerodim(other) | ||
|
||
# numpy will show a DeprecationWarning on invalid elementwise | ||
# comparisons, this will raise in the future | ||
with warnings.catch_warnings(): | ||
|
@@ -573,6 +564,8 @@ def _maybe_mask_result(self, result, mask, other, op_name): | |
|
||
@classmethod | ||
def _create_arithmetic_method(cls, op): | ||
|
||
# @ops.unpack_and_defer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback using the decorator here breaks a bunch of tests, specifically ones operating with DataFrame. Why does this return NotImplemented for Series/Index but not DataFrame? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below, why use |
||
def integer_arithmetic_method(self, other): | ||
|
||
op_name = op.__name__ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ def _period_array_cmp(cls, op): | |
opname = '__{name}__'.format(name=op.__name__) | ||
nat_result = True if opname == '__ne__' else False | ||
|
||
# @ops.unwrap_and_defer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger any idea why not returning NotImplemented for DataFrame? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure... I vaguely remember a broadcasting error, but that may have been user error. |
||
def wrapper(self, other): | ||
op = getattr(self.asi8, opname) | ||
# We want to eventually defer to the Series or PeriodIndex (which will | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ | |
ABCIndexClass, ABCSeries, ABCSparseSeries) | ||
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna | ||
|
||
from pandas.core import ops | ||
from pandas.core.accessor import PandasDelegate, delegate_names | ||
import pandas.core.algorithms as algos | ||
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin | ||
|
@@ -1650,13 +1651,11 @@ def sparse_unary_method(self): | |
|
||
@classmethod | ||
def _create_arithmetic_method(cls, op): | ||
|
||
@ops.unpack_and_defer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both here and below this is technically a change since in the status quo this doesn't defer to DataFrame. Also doesn't call item_from_zerodim ATM. |
||
def sparse_arithmetic_method(self, other): | ||
op_name = op.__name__ | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
# Rely on pandas to dispatch to us. | ||
return NotImplemented | ||
|
||
if isinstance(other, SparseArray): | ||
return _sparse_array_op(self, other, op, op_name) | ||
|
||
|
@@ -1678,10 +1677,6 @@ def sparse_arithmetic_method(self, other): | |
with np.errstate(all='ignore'): | ||
# TODO: delete sparse stuff in core/ops.py | ||
# TODO: look into _wrap_result | ||
if len(self) != len(other): | ||
raise AssertionError( | ||
("length mismatch: {self} vs. {other}".format( | ||
self=len(self), other=len(other)))) | ||
if not isinstance(other, SparseArray): | ||
dtype = getattr(other, 'dtype', None) | ||
other = SparseArray(other, fill_value=self.fill_value, | ||
|
@@ -1693,26 +1688,20 @@ def sparse_arithmetic_method(self, other): | |
|
||
@classmethod | ||
def _create_comparison_method(cls, op): | ||
|
||
@ops.unpack_and_defer | ||
def cmp_method(self, other): | ||
op_name = op.__name__ | ||
|
||
if op_name in {'and_', 'or_'}: | ||
op_name = op_name[:-1] | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
# Rely on pandas to unbox and dispatch to us. | ||
return NotImplemented | ||
|
||
if not is_scalar(other) and not isinstance(other, type(self)): | ||
# convert list-like to ndarray | ||
other = np.asarray(other) | ||
|
||
if isinstance(other, np.ndarray): | ||
# TODO: make this more flexible than just ndarray... | ||
if len(self) != len(other): | ||
raise AssertionError("length mismatch: {self} vs. {other}" | ||
.format(self=len(self), | ||
other=len(other))) | ||
other = SparseArray(other, fill_value=self.fill_value) | ||
|
||
if isinstance(other, SparseArray): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,7 @@ def _td_array_cmp(cls, op): | |
|
||
meth = getattr(dtl.DatetimeLikeArrayMixin, opname) | ||
|
||
# @ops.unpack_and_defer | ||
def wrapper(self, other): | ||
if _is_convertible_to_td(other) or other is NaT: | ||
try: | ||
|
@@ -302,11 +303,8 @@ def _addsub_offset_array(self, other, op): | |
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" | ||
.format(cls=type(self).__name__)) | ||
|
||
@ops.unpack_and_defer | ||
def __mul__(self, other): | ||
other = lib.item_from_zerodim(other) | ||
|
||
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
if is_scalar(other): | ||
# numpy will accept float and int, raise TypeError for others | ||
|
@@ -316,14 +314,6 @@ def __mul__(self, other): | |
freq = self.freq * other | ||
return type(self)(result, freq=freq) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self) and not is_timedelta64_dtype(other): | ||
# Exclude timedelta64 here so we correctly raise TypeError | ||
# for that instead of ValueError | ||
raise ValueError("Cannot multiply with unequal lengths") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a small change (reflected in a test below). |
||
|
||
if is_object_dtype(other): | ||
# this multiplication will succeed only if all elements of other | ||
# are int or float scalars, so we will end up with | ||
|
@@ -338,12 +328,9 @@ def __mul__(self, other): | |
|
||
__rmul__ = __mul__ | ||
|
||
@ops.unpack_and_defer | ||
def __truediv__(self, other): | ||
# timedelta / X is well-defined for timedelta-like or numeric X | ||
other = lib.item_from_zerodim(other) | ||
|
||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
@@ -365,14 +352,7 @@ def __truediv__(self, other): | |
freq = self.freq.delta / other | ||
return type(self)(result, freq=freq) | ||
|
||
if not hasattr(other, "dtype"): | ||
# e.g. list, tuple | ||
other = np.array(other) | ||
|
||
if len(other) != len(self): | ||
raise ValueError("Cannot divide vectors with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
if is_timedelta64_dtype(other): | ||
# let numpy handle it | ||
return self._data / other | ||
|
||
|
@@ -388,12 +368,9 @@ def __truediv__(self, other): | |
result = self._data / other | ||
return type(self)(result) | ||
|
||
@ops.unpack_and_defer | ||
def __rtruediv__(self, other): | ||
# X / timedelta is defined only for timedelta-like X | ||
other = lib.item_from_zerodim(other) | ||
|
||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
@@ -411,14 +388,7 @@ def __rtruediv__(self, other): | |
.format(typ=type(other).__name__, | ||
cls=type(self).__name__)) | ||
|
||
if not hasattr(other, "dtype"): | ||
# e.g. list, tuple | ||
other = np.array(other) | ||
|
||
if len(other) != len(self): | ||
raise ValueError("Cannot divide vectors with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
if is_timedelta64_dtype(other): | ||
# let numpy handle it | ||
return other / self._data | ||
|
||
|
@@ -438,11 +408,9 @@ def __rtruediv__(self, other): | |
__div__ = __truediv__ | ||
__rdiv__ = __rtruediv__ | ||
|
||
@ops.unpack_and_defer | ||
def __floordiv__(self, other): | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if is_scalar(other): | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
@@ -466,13 +434,7 @@ def __floordiv__(self, other): | |
freq = self.freq / other | ||
return type(self)(result.view('m8[ns]'), freq=freq) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self): | ||
raise ValueError("Cannot divide with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
if is_timedelta64_dtype(other): | ||
other = type(self)(other) | ||
|
||
# numpy timedelta64 does not natively support floordiv, so operate | ||
|
@@ -501,11 +463,9 @@ def __floordiv__(self, other): | |
raise TypeError("Cannot divide {typ} by {cls}" | ||
.format(typ=dtype, cls=type(self).__name__)) | ||
|
||
@ops.unpack_and_defer | ||
def __rfloordiv__(self, other): | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if is_scalar(other): | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
@@ -523,13 +483,7 @@ def __rfloordiv__(self, other): | |
.format(typ=type(other).__name__, | ||
cls=type(self).__name__)) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self): | ||
raise ValueError("Cannot divide with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
if is_timedelta64_dtype(other): | ||
other = type(self)(other) | ||
|
||
# numpy timedelta64 does not natively support floordiv, so operate | ||
|
@@ -551,47 +505,51 @@ def __rfloordiv__(self, other): | |
raise TypeError("Cannot divide {typ} by {cls}" | ||
.format(typ=dtype, cls=type(self).__name__)) | ||
|
||
@ops.unpack_and_defer | ||
def __mod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# convert back to something that floordiv will recognize | ||
# as timedelta-like | ||
other = np.timedelta64('NaT', 'ns') | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self - (self // other) * other | ||
|
||
@ops.unpack_and_defer | ||
def __rmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# convert back to something that floordiv will recognize | ||
# as timedelta-like | ||
other = np.timedelta64('NaT', 'ns') | ||
return other - (other // self) * self | ||
|
||
@ops.unpack_and_defer | ||
def __divmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# convert back to something that floordiv will recognize | ||
# as timedelta-like | ||
other = np.timedelta64('NaT', 'ns') | ||
|
||
res1 = self // other | ||
res2 = self - res1 * other | ||
return res1, res2 | ||
|
||
@ops.unpack_and_defer | ||
def __rdivmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# convert back to something that floordiv will recognize | ||
# as timedelta-like | ||
other = np.timedelta64('NaT', 'ns') | ||
|
||
res1 = other // self | ||
res2 = other - res1 * self | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@TomAugspurger are you the right person to ask about Categorical methods? In particular, why does this only return NotImplemented for ABCSeries and not for ABCDataFrame and ABCIndexClass?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No idea. Returning NotImplemented seems reasonable, as long as it ends up back here with the unboxed values.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Categorical == DataFrame is weird both before and after
master
PR
If we transpose then we get all-True, the only difference being that in the PR the result is a DataFrame instead of an ndarray.