Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PERF] use numexpr in dispatch_to_series #22284

Merged
merged 7 commits into from
Sep 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 12 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4837,15 +4837,23 @@ def _arith_op(left, right):
copy=False)

def _combine_match_index(self, other, func, level=None):
assert isinstance(other, Series)
left, right = self.align(other, join='outer', axis=0, level=level,
copy=False)
assert left.index.equals(right.index)
new_data = func(left.values.T, right.values).T
return self._constructor(new_data,
index=left.index, columns=self.columns,
copy=False)

if left._is_mixed_type or right._is_mixed_type:
# operate column-wise; avoid costly object-casting in `.values`
return ops.dispatch_to_series(left, right, func)
else:
# fastpath --> operate directly on values
new_data = func(left.values.T, right.values).T
return self._constructor(new_data,
index=left.index, columns=self.columns,
copy=False)

def _combine_match_columns(self, other, func, level=None, try_cast=True):
assert isinstance(other, Series)
left, right = self.align(other, join='outer', axis=1, level=level,
copy=False)
assert left.columns.equals(right.index)
Expand All @@ -4864,21 +4872,6 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
try_cast=try_cast)
return self._constructor(new_data)

def _compare_frame(self, other, func, str_rep):
# compare_frame assumes self._indexed_same(other)

import pandas.core.computation.expressions as expressions

def _compare(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

new_data = expressions.evaluate(_compare, str_rep, self, other)
result = self._constructor(data=new_data, index=self.index,
copy=False)
result.columns = self.columns
return result

def combine(self, other, func, fill_value=None, overwrite=True):
"""
Perform column-wise combine with another DataFrame based on a
Expand Down
34 changes: 26 additions & 8 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# -----------------------------------------------------------------------------
# DataFrame

def dispatch_to_series(left, right, func):
def dispatch_to_series(left, right, func, str_rep=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Expand All @@ -1631,24 +1631,42 @@ def dispatch_to_series(left, right, func):
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions

right = lib.item_from_zerodim(right)
if lib.is_scalar(right):
new_data = {i: func(left.iloc[:, i], right)
for i in range(len(left.columns))}

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)
new_data = {i: func(left.iloc[:, i], right.iloc[:, i])
for i in range(len(left.columns))}

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
Expand Down Expand Up @@ -1818,7 +1836,7 @@ def f(self, other, axis=default_axis, level=None):
if not self._indexed_same(other):
self, other = self.align(other, 'outer',
level=level, copy=False)
return self._compare_frame(other, na_op, str_rep)
return dispatch_to_series(self, other, na_op, str_rep)

elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, na_op,
Expand All @@ -1843,7 +1861,7 @@ def f(self, other):
if not self._indexed_same(other):
raise ValueError('Can only compare identically-labeled '
'DataFrame objects')
return self._compare_frame(other, func, str_rep)
return dispatch_to_series(self, other, func, str_rep)

elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, func,
Expand Down