Skip to content

Commit

Permalink
Merge pull request #3764 from jtratner/fix_division_with_numexpr
Browse files Browse the repository at this point in the history
BUG: Fix __truediv__ numexpr error
  • Loading branch information
jreback committed Jun 18, 2013
2 parents fad50af + 0e7781c commit da14c6e
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 11 deletions.
3 changes: 3 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ pandas 0.11.1
not converting dtypes (GH3911_)
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
in the ``to_replace`` argument wasn't working (GH3907_)
- Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing
two integer arrays with at least 10000 cells total (GH3764_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -351,6 +353,7 @@ pandas 0.11.1
.. _GH3907: https://github.com/pydata/pandas/issues/3907
.. _GH3911: https://github.com/pydata/pandas/issues/3911
.. _GH3912: https://github.com/pydata/pandas/issues/3912
.. _GH3764: https://github.com/pydata/pandas/issues/3764

pandas 0.11.0
=============
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def set_numexpr_threads(n = None):
pass


def _evaluate_standard(op, op_str, a, b, raise_on_error=True):
def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
""" standard evaluation """
return op(a,b)

Expand Down Expand Up @@ -79,7 +79,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):

return False

def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
result = None

if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
Expand All @@ -92,7 +92,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
result = ne.evaluate('a_value %s b_value' % op_str,
local_dict={ 'a_value' : a_value,
'b_value' : b_value },
casting='safe')
casting='safe', **eval_kwargs)
except (ValueError), detail:
if 'unknown type object' in str(detail):
pass
Expand Down Expand Up @@ -142,7 +142,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False):
# turn myself on
set_use_numexpr(True)

def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs):
""" evaluate and return the expression of the op on a and b
Parameters
Expand All @@ -158,7 +158,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
"""

if use_numexpr:
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error)
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)

def where(cond, a, b, raise_on_error=False, use_numexpr=True):
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ class DataConflictError(Exception):
# Factory helper methods


def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None):
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None, **eval_kwargs):
def na_op(x, y):
try:
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True)
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs)
result = com._fill_zeros(result,y,fill_zeros)

except TypeError:
Expand Down Expand Up @@ -853,12 +853,17 @@ def __contains__(self, key):
__sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None)
__mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None)
__truediv__ = _arith_method(operator.truediv, '__truediv__', '/',
default_axis=None, fill_zeros=np.inf)
default_axis=None, fill_zeros=np.inf, truediv=True)
# numexpr produces a different value (python/numpy: 0.000, numexpr: inf)
# when dividing by zero, so can't use floordiv speed up (yet)
# __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//',
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
default_axis=None, fill_zeros=np.inf)
__pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None)

__mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan)
# currently causes a floating point exception to occur - so sticking with unaccelerated for now
# __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan)
__mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan)

__radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None)
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
Expand All @@ -879,7 +884,7 @@ def __contains__(self, key):
# Python 2 division methods
if not py3compat.PY3:
__div__ = _arith_method(operator.div, '__div__', '/',
default_axis=None, fill_zeros=np.inf)
default_axis=None, fill_zeros=np.inf, truediv=False)
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__',
default_axis=None, fill_zeros=np.inf)

Expand Down
52 changes: 51 additions & 1 deletion pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
_frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64')
_mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') })
_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') })
_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64')

class TestExpressions(unittest.TestCase):

Expand All @@ -41,7 +42,56 @@ def setUp(self):
self.frame2 = _frame2.copy()
self.mixed = _mixed.copy()
self.mixed2 = _mixed2.copy()

self.integer = _integer.copy()
self._MIN_ELEMENTS = expr._MIN_ELEMENTS

def tearDown(self):
expr._MIN_ELEMENTS = self._MIN_ELEMENTS

#TODO: add test for Panel
#TODO: add tests for binary operations
@nose.tools.nottest
def run_arithmetic_test(self, df, assert_func, check_dtype=False):
expr._MIN_ELEMENTS = 0
operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow']
if not py3compat.PY3:
operations.append('div')
for arith in operations:
op = getattr(operator, arith)
expr.set_use_numexpr(False)
expected = op(df, df)
expr.set_use_numexpr(True)
result = op(df, df)
try:
if check_dtype:
if arith == 'div':
assert expected.dtype.kind == df.dtype.kind
if arith == 'truediv':
assert expected.dtype.kind == 'f'
assert_func(expected, result)
except Exception:
print("Failed test with operator %r" % op.__name__)
raise

def test_integer_arithmetic(self):
self.run_arithmetic_test(self.integer, assert_frame_equal)
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal,
check_dtype=True)

def test_float_arithemtic(self):
self.run_arithmetic_test(self.frame, assert_frame_equal)
self.run_arithmetic_test(self.frame.icol(0), assert_series_equal,
check_dtype=True)

def test_mixed_arithmetic(self):
self.run_arithmetic_test(self.mixed, assert_frame_equal)
for col in self.mixed.columns:
self.run_arithmetic_test(self.mixed[col], assert_series_equal)

def test_integer_with_zeros(self):
self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
self.run_arithmetic_test(self.integer, assert_frame_equal)
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal)

def test_invalid(self):

Expand Down

0 comments on commit da14c6e

Please sign in to comment.