Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix __truediv__ numexpr error #3764

Merged
merged 5 commits into from
Jun 18, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ pandas 0.11.1
not converting dtypes (GH3911_)
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
in the ``to_replace`` argument wasn't working (GH3907_)
- Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing
two integer arrays with at least 10000 cells total (GH3764_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -351,6 +353,7 @@ pandas 0.11.1
.. _GH3907: https://github.com/pydata/pandas/issues/3907
.. _GH3911: https://github.com/pydata/pandas/issues/3911
.. _GH3912: https://github.com/pydata/pandas/issues/3912
.. _GH3764: https://github.com/pydata/pandas/issues/3764

pandas 0.11.0
=============
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def set_numexpr_threads(n = None):
pass


def _evaluate_standard(op, op_str, a, b, raise_on_error=True):
def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
""" standard evaluation """
return op(a,b)

Expand Down Expand Up @@ -79,7 +79,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):

return False

def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
result = None

if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
Expand All @@ -92,7 +92,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
result = ne.evaluate('a_value %s b_value' % op_str,
local_dict={ 'a_value' : a_value,
'b_value' : b_value },
casting='safe')
casting='safe', **eval_kwargs)
except (ValueError), detail:
if 'unknown type object' in str(detail):
pass
Expand Down Expand Up @@ -142,7 +142,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False):
# turn myself on
set_use_numexpr(True)

def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs):
""" evaluate and return the expression of the op on a and b

Parameters
Expand All @@ -158,7 +158,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
"""

if use_numexpr:
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error)
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)

def where(cond, a, b, raise_on_error=False, use_numexpr=True):
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ class DataConflictError(Exception):
# Factory helper methods


def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None):
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None, **eval_kwargs):
def na_op(x, y):
try:
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True)
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs)
result = com._fill_zeros(result,y,fill_zeros)

except TypeError:
Expand Down Expand Up @@ -853,12 +853,17 @@ def __contains__(self, key):
__sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None)
__mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None)
__truediv__ = _arith_method(operator.truediv, '__truediv__', '/',
default_axis=None, fill_zeros=np.inf)
default_axis=None, fill_zeros=np.inf, truediv=True)
# numexpr produces a different value (python/numpy: 0.000, numexpr: inf)
# when dividing by zero, so can't use floordiv speed up (yet)
# __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//',
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
default_axis=None, fill_zeros=np.inf)
__pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None)

__mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan)
# currently causes a floating point exception to occur - so sticking with unaccelerated for now
# __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan)
__mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan)

__radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None)
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
Expand All @@ -879,7 +884,7 @@ def __contains__(self, key):
# Python 2 division methods
if not py3compat.PY3:
__div__ = _arith_method(operator.div, '__div__', '/',
default_axis=None, fill_zeros=np.inf)
default_axis=None, fill_zeros=np.inf, truediv=False)
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__',
default_axis=None, fill_zeros=np.inf)

Expand Down
52 changes: 51 additions & 1 deletion pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
_frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64')
_mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') })
_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') })
_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64')

class TestExpressions(unittest.TestCase):

Expand All @@ -41,7 +42,56 @@ def setUp(self):
self.frame2 = _frame2.copy()
self.mixed = _mixed.copy()
self.mixed2 = _mixed2.copy()

self.integer = _integer.copy()
self._MIN_ELEMENTS = expr._MIN_ELEMENTS

def tearDown(self):
expr._MIN_ELEMENTS = self._MIN_ELEMENTS

#TODO: add test for Panel
#TODO: add tests for binary operations
@nose.tools.nottest
def run_arithmetic_test(self, df, assert_func, check_dtype=False):
expr._MIN_ELEMENTS = 0
operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow']
if not py3compat.PY3:
operations.append('div')
for arith in operations:
op = getattr(operator, arith)
expr.set_use_numexpr(False)
expected = op(df, df)
expr.set_use_numexpr(True)
result = op(df, df)
try:
if check_dtype:
if arith == 'div':
assert expected.dtype.kind == df.dtype.kind
if arith == 'truediv':
assert expected.dtype.kind == 'f'
assert_func(expected, result)
except Exception:
print("Failed test with operator %r" % op.__name__)
raise

def test_integer_arithmetic(self):
self.run_arithmetic_test(self.integer, assert_frame_equal)
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal,
check_dtype=True)

def test_float_arithemtic(self):
self.run_arithmetic_test(self.frame, assert_frame_equal)
self.run_arithmetic_test(self.frame.icol(0), assert_series_equal,
check_dtype=True)

def test_mixed_arithmetic(self):
self.run_arithmetic_test(self.mixed, assert_frame_equal)
for col in self.mixed.columns:
self.run_arithmetic_test(self.mixed[col], assert_series_equal)

def test_integer_with_zeros(self):
self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
self.run_arithmetic_test(self.integer, assert_frame_equal)
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal)

def test_invalid(self):

Expand Down