diff --git a/RELEASE.rst b/RELEASE.rst index 57cb53c1096f6..ebd88091050f1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -250,6 +250,8 @@ pandas 0.11.1 not converting dtypes (GH3911_) - Fixed a bug where ``DataFrame.replace`` with a compiled regular expression in the ``to_replace`` argument wasn't working (GH3907_) + - Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing + two integer arrays with at least 10000 cells total (GH3764_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -351,6 +353,7 @@ pandas 0.11.1 .. _GH3907: https://github.com/pydata/pandas/issues/3907 .. _GH3911: https://github.com/pydata/pandas/issues/3911 .. _GH3912: https://github.com/pydata/pandas/issues/3912 +.. _GH3764: https://github.com/pydata/pandas/issues/3764 pandas 0.11.0 ============= diff --git a/pandas/core/expressions.py b/pandas/core/expressions.py index 34e56fe576a07..abe891b82410c 100644 --- a/pandas/core/expressions.py +++ b/pandas/core/expressions.py @@ -51,7 +51,7 @@ def set_numexpr_threads(n = None): pass -def _evaluate_standard(op, op_str, a, b, raise_on_error=True): +def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): """ standard evaluation """ return op(a,b) @@ -79,7 +79,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): return False -def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False): +def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs): result = None if _can_use_numexpr(op, op_str, a, b, 'evaluate'): @@ -92,7 +92,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False): result = ne.evaluate('a_value %s b_value' % op_str, local_dict={ 'a_value' : a_value, 'b_value' : b_value }, - casting='safe') + casting='safe', **eval_kwargs) except (ValueError), detail: if 'unknown type object' in str(detail): pass @@ -142,7 +142,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False): # turn myself on set_use_numexpr(True) -def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True): +def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs): """ evaluate and return the expression of the op on a and b Parameters @@ -158,7 +158,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True): """ if use_numexpr: - return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error) + return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs) return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error) def where(cond, a, b, raise_on_error=False, use_numexpr=True): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0145364363ac..47142daa8b20b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -190,10 +190,10 @@ class DataConflictError(Exception): # Factory helper methods -def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None): +def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None, **eval_kwargs): def na_op(x, y): try: - result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True) + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) result = com._fill_zeros(result,y,fill_zeros) except TypeError: @@ -853,12 +853,17 @@ def __contains__(self, key): __sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None) __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', - default_axis=None, fill_zeros=np.inf) + default_axis=None, fill_zeros=np.inf, truediv=True) + # numexpr produces a different value (python/numpy: 0.000, numexpr: inf) + # when dividing by zero, so can't use floordiv speed up (yet) + # __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//', __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', default_axis=None, fill_zeros=np.inf) __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) - __mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan) + # currently causes a floating point exception to occur - so sticking with unaccelerated for now + # __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan) + __mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan) __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) @@ -879,7 +884,7 @@ def __contains__(self, key): # Python 2 division methods if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__', '/', - default_axis=None, fill_zeros=np.inf) + default_axis=None, fill_zeros=np.inf, truediv=False) __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None, fill_zeros=np.inf) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index af7f20a65fa7c..ba0a9926dfa78 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -30,6 +30,7 @@ _frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64') _mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') }) _mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') }) +_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64') class TestExpressions(unittest.TestCase): @@ -41,7 +42,56 @@ def setUp(self): self.frame2 = _frame2.copy() self.mixed = _mixed.copy() self.mixed2 = _mixed2.copy() - + self.integer = _integer.copy() + self._MIN_ELEMENTS = expr._MIN_ELEMENTS + + def tearDown(self): + expr._MIN_ELEMENTS = self._MIN_ELEMENTS + + #TODO: add test for Panel + #TODO: add tests for binary operations + @nose.tools.nottest + def run_arithmetic_test(self, df, assert_func, check_dtype=False): + expr._MIN_ELEMENTS = 0 + operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] + if not py3compat.PY3: + operations.append('div') + for arith in operations: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, df) + expr.set_use_numexpr(True) + result = op(df, df) + try: + if check_dtype: + if arith == 'div': + assert expected.dtype.kind == df.dtype.kind + if arith == 'truediv': + assert expected.dtype.kind == 'f' + assert_func(expected, result) + except Exception: + print("Failed test with operator %r" % op.__name__) + raise + + def test_integer_arithmetic(self): + self.run_arithmetic_test(self.integer, assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), assert_series_equal, + check_dtype=True) + + def test_float_arithemtic(self): + self.run_arithmetic_test(self.frame, assert_frame_equal) + self.run_arithmetic_test(self.frame.icol(0), assert_series_equal, + check_dtype=True) + + def test_mixed_arithmetic(self): + self.run_arithmetic_test(self.mixed, assert_frame_equal) + for col in self.mixed.columns: + self.run_arithmetic_test(self.mixed[col], assert_series_equal) + + def test_integer_with_zeros(self): + self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) + self.run_arithmetic_test(self.integer, assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), assert_series_equal) def test_invalid(self):