diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 18f1dac3c13b2..a6c47f3192175 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -1,302 +1,355 @@ +import operator + import numpy as np import pytest import pandas as pd import pandas._testing as tm -from pandas.api.types import is_float, is_float_dtype, is_scalar -from pandas.core.arrays import IntegerArray, integer_array -from pandas.tests.extension.base import BaseOpsUtil - - -class TestArithmeticOps(BaseOpsUtil): - def _check_divmod_op(self, s, op, other, exc=None): - super()._check_divmod_op(s, op, other, None) - - def _check_op(self, s, op_name, other, exc=None): - op = self.get_op_from_name(op_name) - result = op(s, other) - - # compute expected - mask = s.isna() - - # if s is a DataFrame, squeeze to a Series - # for comparison - if isinstance(s, pd.DataFrame): - result = result.squeeze() - s = s.squeeze() - mask = mask.squeeze() - - # other array is an Integer - if isinstance(other, IntegerArray): - omask = getattr(other, "mask", None) - mask = getattr(other, "data", other) - if omask is not None: - mask |= omask - - # 1 ** na is na, so need to unmask those - if op_name == "__pow__": - mask = np.where(~s.isna() & (s == 1), False, mask) - - elif op_name == "__rpow__": - other_is_one = other == 1 - if isinstance(other_is_one, pd.Series): - other_is_one = other_is_one.fillna(False) - mask = np.where(other_is_one, False, mask) - - # float result type or float op - if ( - is_float_dtype(other) - or is_float(other) - or op_name in ["__rtruediv__", "__truediv__", "__rdiv__", "__div__"] - ): - rs = s.astype("float") - expected = op(rs, other) - self._check_op_float(result, expected, mask, s, op_name, other) - - # integer result type +from pandas.core.arrays import ExtensionArray, integer_array +import pandas.core.ops as ops + + +# TODO need to use existing utility function or move this somewhere central +def get_op_from_name(op_name): + short_opname = op_name.strip("_") + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + +# Basic test for the arithmetic array ops +# ----------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "opname, exp", + [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])], + ids=["add", "mul"], +) +def test_add_mul(dtype, opname, exp): + a = pd.array([0, 1, None, 3, 4], dtype=dtype) + b = pd.array([1, 2, 3, None, 5], dtype=dtype) + + # array / array + expected = pd.array(exp, dtype=dtype) + + op = getattr(operator, opname) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + + op = getattr(ops, "r" + opname) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + + +def test_sub(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a - b + expected = pd.array([1, 1, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_div(dtype): + # for now division gives a float numpy array + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a / b + expected = np.array([np.inf, 2, np.nan, np.nan, 1.25], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) +def test_divide_by_zero(zero, negative): + # https://github.com/pandas-dev/pandas/issues/27398 + a = pd.array([0, 1, -1, None], dtype="Int64") + result = a / zero + expected = np.array([np.nan, np.inf, -np.inf, np.nan]) + if negative: + expected *= -1 + tm.assert_numpy_array_equal(result, expected) + + +def test_floordiv(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a // b + # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) + expected = pd.array([0, 2, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_mod(dtype): + a = pd.array([1, 2, 3, None, 5], dtype=dtype) + b = pd.array([0, 1, None, 3, 4], dtype=dtype) + + result = a % b + expected = pd.array([0, 0, None, None, 1], dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +def test_pow_scalar(): + a = pd.array([-1, 0, 1, None, 2], dtype="Int64") + result = a ** 0 + expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** 1 + expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** pd.NA + expected = pd.array([None, None, 1, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** np.nan + expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # reversed + a = a[1:] # Can't raise integers to negative powers. + + result = 0 ** a + expected = pd.array([1, 0, None, 0], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = 1 ** a + expected = pd.array([1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = pd.NA ** a + expected = pd.array([1, None, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = np.nan ** a + expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + +def test_pow_array(): + a = integer_array([0, 0, 0, 1, 1, 1, None, None, None]) + b = integer_array([0, 1, None, 0, 1, None, 0, 1, None]) + result = a ** b + expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None]) + tm.assert_extension_array_equal(result, expected) + + +def test_rpow_one_to_na(): + # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 + arr = integer_array([np.nan, np.nan]) + result = np.array([1.0, 2.0]) ** arr + expected = np.array([1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + + +# Test equivalence of scalars, numpy arrays with array ops +# ----------------------------------------------------------------------------- + + +def test_array_scalar_like_equivalence(data, all_arithmetic_operators): + op = get_op_from_name(all_arithmetic_operators) + + scalar = 2 + scalar_array = pd.array([2] * len(data), dtype=data.dtype) + + # TODO also add len-1 array (np.array([2], dtype=data.dtype.numpy_dtype)) + for scalar in [2, data.dtype.type(2)]: + result = op(data, scalar) + expected = op(data, scalar_array) + if isinstance(expected, ExtensionArray): + tm.assert_extension_array_equal(result, expected) else: - rs = pd.Series(s.values._data, name=s.name) - expected = op(rs, other) - self._check_op_integer(result, expected, mask, s, op_name, other) - - def _check_op_float(self, result, expected, mask, s, op_name, other): - # check comparisons that are resulting in float dtypes - - expected[mask] = np.nan - if "floordiv" in op_name: - # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) - mask2 = np.isinf(expected) & np.isnan(result) - expected[mask2] = np.nan - tm.assert_series_equal(result, expected) - - def _check_op_integer(self, result, expected, mask, s, op_name, other): - # check comparisons that are resulting in integer dtypes - - # to compare properly, we convert the expected - # to float, mask to nans and convert infs - # if we have uints then we process as uints - # then convert to float - # and we ultimately want to create a IntArray - # for comparisons - - fill_value = 0 - - # mod/rmod turn floating 0 into NaN while - # integer works as expected (no nan) - if op_name in ["__mod__", "__rmod__"]: - if is_scalar(other): - if other == 0: - expected[s.values == 0] = 0 - else: - expected = expected.fillna(0) - else: - expected[ - (s.values == 0).fillna(False) - & ((expected == 0).fillna(False) | expected.isna()) - ] = 0 - try: - expected[ - ((expected == np.inf) | (expected == -np.inf)).fillna(False) - ] = fill_value - original = expected - expected = expected.astype(s.dtype) - - except ValueError: - - expected = expected.astype(float) - expected[ - ((expected == np.inf) | (expected == -np.inf)).fillna(False) - ] = fill_value - original = expected - expected = expected.astype(s.dtype) - - expected[mask] = pd.NA - - # assert that the expected astype is ok - # (skip for unsigned as they have wrap around) - if not s.dtype.is_unsigned_integer: - original = pd.Series(original) - - # we need to fill with 0's to emulate what an astype('int') does - # (truncation) for certain ops - if op_name in ["__rtruediv__", "__rdiv__"]: - mask |= original.isna() - original = original.fillna(0).astype("int") - - original = original.astype("float") - original[mask] = np.nan - tm.assert_series_equal(original, expected.astype("float")) - - # assert our expected result - tm.assert_series_equal(result, expected) - - def test_arith_integer_array(self, data, all_arithmetic_operators): - # we operate with a rhs of an integer array - - op = all_arithmetic_operators + # TODO div still gives float ndarray -> remove this once we have Float EA + tm.assert_numpy_array_equal(result, expected) - s = pd.Series(data) - rhs = pd.Series([1] * len(data), dtype=data.dtype) - rhs.iloc[-1] = np.nan - self._check_op(s, op, rhs) +def test_array_NA(data, all_arithmetic_operators): + if "truediv" in all_arithmetic_operators: + pytest.skip("division with pd.NA raises") + op = get_op_from_name(all_arithmetic_operators) - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - # scalar - op = all_arithmetic_operators - s = pd.Series(data) - self._check_op(s, op, 1, exc=TypeError) + scalar = pd.NA + scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): - # frame & scalar - op = all_arithmetic_operators - df = pd.DataFrame({"A": data}) - self._check_op(df, op, 1, exc=TypeError) + result = op(data, scalar) + expected = op(data, scalar_array) + tm.assert_extension_array_equal(result, expected) - def test_arith_series_with_array(self, data, all_arithmetic_operators): - # ndarray & other series - op = all_arithmetic_operators - s = pd.Series(data) - other = np.ones(len(s), dtype=s.dtype.type) - self._check_op(s, op, other, exc=TypeError) - def test_arith_coerce_scalar(self, data, all_arithmetic_operators): +def test_numpy_array_equivalence(data, all_arithmetic_operators): + op = get_op_from_name(all_arithmetic_operators) - op = all_arithmetic_operators - s = pd.Series(data) + numpy_array = np.array([2] * len(data), dtype=data.dtype.numpy_dtype) + pd_array = pd.array(numpy_array, dtype=data.dtype) + + result = op(data, numpy_array) + expected = op(data, pd_array) + if isinstance(expected, ExtensionArray): + tm.assert_extension_array_equal(result, expected) + else: + # TODO div still gives float ndarray -> remove this once we have Float EA + tm.assert_numpy_array_equal(result, expected) - other = 0.01 - self._check_op(s, op, other) - @pytest.mark.parametrize("other", [1.0, np.array(1.0)]) - def test_arithmetic_conversion(self, all_arithmetic_operators, other): - # if we have a float operand we should have a float result - # if that is equal to an integer - op = self.get_op_from_name(all_arithmetic_operators) +@pytest.mark.parametrize("other", [0, 0.5]) +def test_numpy_zero_dim_ndarray(other): + arr = integer_array([1, None, 2]) + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) - s = pd.Series([1, 2, 3], dtype="Int64") - result = op(s, other) - assert result.dtype is np.dtype("float") - def test_arith_len_mismatch(self, all_arithmetic_operators): - # operating with a list-like with non-matching length raises - op = self.get_op_from_name(all_arithmetic_operators) - other = np.array([1.0]) +# Test equivalence with Series and DataFrame ops +# ----------------------------------------------------------------------------- - s = pd.Series([1, 2, 3], dtype="Int64") - with pytest.raises(ValueError, match="Lengths must match"): - op(s, other) - @pytest.mark.parametrize("other", [0, 0.5]) - def test_arith_zero_dim_ndarray(self, other): - arr = integer_array([1, None, 2]) - result = arr + np.array(other) - expected = arr + other - tm.assert_equal(result, expected) +def test_frame(data, all_arithmetic_operators): + op = get_op_from_name(all_arithmetic_operators) - def test_error(self, data, all_arithmetic_operators): - # invalid ops + # DataFrame with scalar + df = pd.DataFrame({"A": data}) + scalar = 2 - op = all_arithmetic_operators - s = pd.Series(data) - ops = getattr(s, op) - opa = getattr(data, op) + result = op(df, scalar) + expected = pd.DataFrame({"A": op(data, scalar)}) + tm.assert_frame_equal(result, expected) + + +def test_series(data, all_arithmetic_operators): + op = get_op_from_name(all_arithmetic_operators) + + s = pd.Series(data) + + # Series with scalar + scalar = 2 + result = op(s, scalar) + expected = pd.Series(op(data, scalar)) + tm.assert_series_equal(result, expected) + + # Series with np.ndarray + other = np.ones(len(data), dtype=data.dtype.type) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) - # invalid scalars + # Series with pd.array + other = pd.array(np.ones(len(data)), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other)) + tm.assert_series_equal(result, expected) + + # Series with Series + other = pd.Series(np.ones(len(data)), dtype=data.dtype) + result = op(s, other) + expected = pd.Series(op(data, other.array)) + tm.assert_series_equal(result, expected) + + +# Test generic charachteristics / errors +# ----------------------------------------------------------------------------- + + +def test_error_invalid_values(data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + + # invalid scalars + msg = ( + r"(:?can only perform ops with numeric values)" + r"|(:?IntegerArray cannot perform the operation mod)" + ) + with pytest.raises(TypeError, match=msg): + ops("foo") + with pytest.raises(TypeError, match=msg): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + with pytest.raises(TypeError, match=msg): + ops(pd.Series("foo", index=s.index)) + + if op != "__rpow__": + # TODO(extension) + # rpow with a datetimelike coerces the integer array incorrectly msg = ( - r"(:?can only perform ops with numeric values)" - r"|(:?IntegerArray cannot perform the operation mod)" + "can only perform ops with numeric values|" + "cannot perform .* with this index type: DatetimeArray|" + "Addition/subtraction of integers and integer-arrays " + "with DatetimeArray is no longer supported. *" ) with pytest.raises(TypeError, match=msg): - ops("foo") - with pytest.raises(TypeError, match=msg): - ops(pd.Timestamp("20180101")) + ops(pd.Series(pd.date_range("20180101", periods=len(s)))) - # invalid array-likes - with pytest.raises(TypeError, match=msg): - ops(pd.Series("foo", index=s.index)) - - if op != "__rpow__": - # TODO(extension) - # rpow with a datetimelike coerces the integer array incorrectly - msg = ( - "can only perform ops with numeric values|" - "cannot perform .* with this index type: DatetimeArray|" - "Addition/subtraction of integers and integer-arrays " - "with DatetimeArray is no longer supported. *" - ) - with pytest.raises(TypeError, match=msg): - ops(pd.Series(pd.date_range("20180101", periods=len(s)))) - - # 2d - result = opa(pd.DataFrame({"A": s})) - assert result is NotImplemented - - msg = r"can only perform ops with 1-d structures" - with pytest.raises(NotImplementedError, match=msg): - opa(np.arange(len(s)).reshape(-1, len(s))) - - @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) - def test_divide_by_zero(self, zero, negative): - # https://github.com/pandas-dev/pandas/issues/27398 - a = pd.array([0, 1, -1, None], dtype="Int64") - result = a / zero - expected = np.array([np.nan, np.inf, -np.inf, np.nan]) - if negative: - expected *= -1 - tm.assert_numpy_array_equal(result, expected) - def test_pow_scalar(self): - a = pd.array([-1, 0, 1, None, 2], dtype="Int64") - result = a ** 0 - expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") - tm.assert_extension_array_equal(result, expected) +def test_error_invalid_object(data, all_arithmetic_operators): - result = a ** 1 - expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") - tm.assert_extension_array_equal(result, expected) + op = all_arithmetic_operators + opa = getattr(data, op) - result = a ** pd.NA - expected = pd.array([None, None, 1, None, None], dtype="Int64") - tm.assert_extension_array_equal(result, expected) + # 2d -> return NotImplemented + result = opa(pd.DataFrame({"A": data})) + assert result is NotImplemented - result = a ** np.nan - expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64") - tm.assert_numpy_array_equal(result, expected) + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): + opa(np.arange(len(data)).reshape(-1, len(data))) - # reversed - a = a[1:] # Can't raise integers to negative powers. - result = 0 ** a - expected = pd.array([1, 0, None, 0], dtype="Int64") - tm.assert_extension_array_equal(result, expected) +def test_error_len_mismatch(all_arithmetic_operators): + # operating with a list-like with non-matching length raises + op = get_op_from_name(all_arithmetic_operators) - result = 1 ** a - expected = pd.array([1, 1, 1, 1], dtype="Int64") - tm.assert_extension_array_equal(result, expected) + data = pd.array([1, 2, 3], dtype="Int64") - result = pd.NA ** a - expected = pd.array([1, None, None, None], dtype="Int64") - tm.assert_extension_array_equal(result, expected) + for other in [[1, 2], np.array([1.0, 2.0])]: + with pytest.raises(ValueError, match="Lengths must match"): + op(data, other) - result = np.nan ** a - expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64") - tm.assert_numpy_array_equal(result, expected) + s = pd.Series(data) + with pytest.raises(ValueError, match="Lengths must match"): + op(s, other) - def test_pow_array(self): - a = integer_array([0, 0, 0, 1, 1, 1, None, None, None]) - b = integer_array([0, 1, None, 0, 1, None, 0, 1, None]) - result = a ** b - expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None]) - tm.assert_extension_array_equal(result, expected) - def test_rpow_one_to_na(self): - # https://github.com/pandas-dev/pandas/issues/22022 - # https://github.com/pandas-dev/pandas/issues/29997 - arr = integer_array([np.nan, np.nan]) - result = np.array([1.0, 2.0]) ** arr - expected = np.array([1.0, np.nan]) - tm.assert_numpy_array_equal(result, expected) +# Various +# ----------------------------------------------------------------------------- + + +# TODO test unsigned overflow + + +def test_arith_coerce_scalar(data, all_arithmetic_operators): + op = get_op_from_name(all_arithmetic_operators) + s = pd.Series(data) + other = 0.01 + + result = op(s, other) + expected = op(s.astype(float), other) + # rfloordiv results in nan instead of inf + if all_arithmetic_operators == "__rfloordiv__": + expected[(expected == np.inf) | (expected == -np.inf)] = np.nan + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("other", [1.0, np.array(1.0)]) +def test_arithmetic_conversion(all_arithmetic_operators, other): + # if we have a float operand we should have a float result + # if that is equal to an integer + op = get_op_from_name(all_arithmetic_operators) + + s = pd.Series([1, 2, 3], dtype="Int64") + result = op(s, other) + assert result.dtype is np.dtype("float") def test_cross_type_arithmetic():