From f46c58b1df41f68d9c2baec7138adc90124cb781 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 10 Aug 2018 21:05:28 -0700 Subject: [PATCH 1/4] fake commit so I can run asv --- asv_bench/asv.conf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 9c333f62810f4..61cad8a292072 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["3.6"], + "pythons": ["2.7"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty From 0cf9fa0093f47459f5c1ada118cf1b60b2816835 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 10 Aug 2018 21:53:10 -0700 Subject: [PATCH 2/4] fake commit so i can run asv --- pandas/core/frame.py | 31 +++++++++++++++++++++++-------- pandas/core/ops.py | 35 +++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 638129291b495..535355b457368 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4914,21 +4914,31 @@ def _arith_op(left, right): return ops.dispatch_to_series(this, other, _arith_op) else: result = _arith_op(this.values, other.values) - - return self._constructor(result, index=new_index, columns=new_columns, - copy=False) + return self._constructor(result, index=new_index, + columns=new_columns, + copy=False) def _combine_match_index(self, other, func, level=None): + assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=0, level=level, copy=False) - new_data = func(left.values.T, right.values).T - return self._constructor(new_data, - index=left.index, columns=self.columns, - copy=False) + assert left.index.equals(right.index) + + if left._is_mixed_type or right._is_mixed_type: + # operate column-wise; avoid costly object-casting in `.values` + return ops.dispatch_to_series(left, right, func) + else: + # fastpath --> operate directly on values + new_data = func(left.values.T, right.values).T + return self._constructor(new_data, + index=left.index, columns=self.columns, + copy=False) def _combine_match_columns(self, other, func, level=None, try_cast=True): + assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=1, level=level, copy=False) + assert left.columns.equals(right.index) new_data = left._data.eval(func=func, other=right, axes=[left.columns, self.index], @@ -4936,6 +4946,11 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return self._constructor(new_data) def _combine_const(self, other, func, errors='raise', try_cast=True): + + if isinstance(other, DataFrame) and other._indexed_same(self): + assert False + return ops.dispatch_to_series(self, other, func) + new_data = self._data.eval(func=func, other=other, errors=errors, try_cast=try_cast) @@ -4943,7 +4958,7 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): def _compare_frame(self, other, func, str_rep): # compare_frame assumes self._indexed_same(other) - + return ops.dispatch_to_series(self, other, func, str_rep) import pandas.core.computation.expressions as expressions def _compare(a, b): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dc139a8e14f66..cb65792827e2e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1313,6 +1313,7 @@ def _comp_method_SERIES(cls, op, special): """ op_name = _get_op_name(op, special) masker = _gen_eval_kwargs(op_name).get('masker', False) + str_rep = _get_opstr(op, cls) def na_op(x, y): # TODO: @@ -1577,7 +1578,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame -def dispatch_to_series(left, right, func): +def dispatch_to_series(left, right, func, str_rep=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -1587,6 +1588,7 @@ def dispatch_to_series(left, right, func): left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator + str_rep : str or None, default None Returns ------- @@ -1594,16 +1596,37 @@ def dispatch_to_series(left, right, func): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. + import pandas.core.computation.expressions as expressions + if lib.is_scalar(right): - new_data = {i: func(left.iloc[:, i], right) - for i in range(len(left.columns))} + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + + #new_data = {i: func(left.iloc[:, i], right) + # for i in range(len(left.columns))} elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) - new_data = {i: func(left.iloc[:, i], right.iloc[:, i]) - for i in range(len(left.columns))} + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[:, i]) + for i in range(len(a.columns))} + + #new_data = {i: func(left.iloc[:, i], right.iloc[:, i]) + # for i in range(len(left.columns))} + elif isinstance(right, ABCSeries): + assert right.index.equals(left.index) # Handle other cases later + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + else: # Remaining cases have less-obvious dispatch rules - raise NotImplementedError + raise NotImplementedError(right) + + new_data = expressions.evaluate(column_op, str_rep, left, right) result = left._constructor(new_data, index=left.index, copy=False) # Pin columns instead of passing to constructor for compat with From e9748a06a7846cf1010367713801297da449a9ba Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 11:52:21 -0700 Subject: [PATCH 3/4] remove commented-out test code --- asv_bench/asv.conf.json | 2 +- pandas/core/frame.py | 20 -------------------- pandas/core/ops.py | 8 ++------ 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 61cad8a292072..9c333f62810f4 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["2.7"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 535355b457368..a190b116d081d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4946,31 +4946,11 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return self._constructor(new_data) def _combine_const(self, other, func, errors='raise', try_cast=True): - - if isinstance(other, DataFrame) and other._indexed_same(self): - assert False - return ops.dispatch_to_series(self, other, func) - new_data = self._data.eval(func=func, other=other, errors=errors, try_cast=try_cast) return self._constructor(new_data) - def _compare_frame(self, other, func, str_rep): - # compare_frame assumes self._indexed_same(other) - return ops.dispatch_to_series(self, other, func, str_rep) - import pandas.core.computation.expressions as expressions - - def _compare(a, b): - return {i: func(a.iloc[:, i], b.iloc[:, i]) - for i in range(len(a.columns))} - - new_data = expressions.evaluate(_compare, str_rep, self, other) - result = self._constructor(data=new_data, index=self.index, - copy=False) - result.columns = self.columns - return result - def combine(self, other, func, fill_value=None, overwrite=True): """ Perform column-wise combine with another DataFrame based on a diff --git a/pandas/core/ops.py b/pandas/core/ops.py index cb65792827e2e..39aa6e7396a0d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1604,8 +1604,6 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} - #new_data = {i: func(left.iloc[:, i], right) - # for i in range(len(left.columns))} elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) @@ -1613,8 +1611,6 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} - #new_data = {i: func(left.iloc[:, i], right.iloc[:, i]) - # for i in range(len(left.columns))} elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later @@ -1795,7 +1791,7 @@ def f(self, other, axis=default_axis, level=None): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame(other, na_op, str_rep) + return dispatch_to_series(self, other, na_op, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, @@ -1820,7 +1816,7 @@ def f(self, other): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - return self._compare_frame(other, func, str_rep) + return dispatch_to_series(self, other, func, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, From fa12cf7cad4a818cee335395d89c04b19d6a4829 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 14:11:49 -0700 Subject: [PATCH 4/4] fixup revert unused --- pandas/core/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 39aa6e7396a0d..359b05394d1af 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1313,7 +1313,6 @@ def _comp_method_SERIES(cls, op, special): """ op_name = _get_op_name(op, special) masker = _gen_eval_kwargs(op_name).get('masker', False) - str_rep = _get_opstr(op, cls) def na_op(x, y): # TODO: