From 7f2aa8f46a4a36937b1be8ec2498c4ff2dd4cf34 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 25 Jan 2023 09:20:33 -0800 Subject: [PATCH] ENH: pd.NA comparison with time, date, timedelta (#50901) * ENH: pd.NA comparison with time, date, timedelta * mypy fixup * fix on nullable dtypes --- pandas/_libs/missing.pyx | 9 +++++++++ pandas/tests/extension/base/methods.py | 13 +++++++++++-- pandas/tests/extension/test_arrow.py | 6 +----- pandas/tests/extension/test_boolean.py | 19 ++----------------- pandas/tests/extension/test_floating.py | 2 +- pandas/tests/extension/test_integer.py | 2 +- pandas/tests/extension/test_sparse.py | 23 +---------------------- pandas/tests/scalar/test_na_scalar.py | 21 ++++++++++++++++++++- 8 files changed, 46 insertions(+), 49 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index fc94d221a63b9..e6516b004a973 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -3,6 +3,11 @@ import numbers from sys import maxsize cimport cython +from cpython.datetime cimport ( + date, + time, + timedelta, +) from cython cimport Py_ssize_t import numpy as np @@ -307,6 +312,7 @@ def is_numeric_na(values: ndarray) -> ndarray: def _create_binary_propagating_op(name, is_divmod=False): + is_cmp = name.strip("_") in ["eq", "ne", "le", "lt", "ge", "gt"] def method(self, other): if (other is C_NA or isinstance(other, (str, bytes)) @@ -329,6 +335,9 @@ def _create_binary_propagating_op(name, is_divmod=False): else: return out + elif is_cmp and isinstance(other, (date, time, timedelta)): + return NA + return NotImplemented method.__name__ = name diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 2df410dff2b00..54d41fa9d972a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._typing import Dtype + from pandas.core.dtypes.common import is_bool_dtype from pandas.core.dtypes.missing import na_value_for_dtype @@ -260,6 +262,9 @@ def test_fillna_length_mismatch(self, data_missing): with pytest.raises(ValueError, match=msg): data_missing.fillna(data_missing.take([1])) + # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool] + _combine_le_expected_dtype: Dtype = np.dtype(bool) + def test_combine_le(self, data_repeated): # GH 20825 # Test that combine works when doing a <= (le) comparison @@ -268,13 +273,17 @@ def test_combine_le(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))] + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + dtype=self._combine_le_expected_dtype, ) self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= val for a in list(orig_data1)]) + expected = pd.Series( + [a <= val for a in list(orig_data1)], + dtype=self._combine_le_expected_dtype, + ) self.assert_series_equal(result, expected) def test_combine_add(self, data_repeated): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2467471e3643e..eef77ceabb6fe 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -972,11 +972,7 @@ def test_factorize(self, data_for_grouping, request): ) super().test_factorize(data_for_grouping) - @pytest.mark.xfail( - reason="result dtype pyarrow[bool] better than expected dtype object" - ) - def test_combine_le(self, data_repeated): - super().test_combine_le(data_repeated) + _combine_le_expected_dtype = "bool[pyarrow]" def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index b611701e4e429..8c8cbfa5200b1 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -176,6 +176,8 @@ class TestReshaping(base.BaseReshapingTests): class TestMethods(base.BaseMethodsTests): + _combine_le_expected_dtype = "boolean" + def test_factorize(self, data_for_grouping): # override because we only have 2 unique values labels, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True) @@ -185,23 +187,6 @@ def test_factorize(self, data_for_grouping): tm.assert_numpy_array_equal(labels, expected_labels) self.assert_extension_array_equal(uniques, expected_uniques) - def test_combine_le(self, data_repeated): - # override because expected needs to be boolean instead of bool dtype - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], - dtype="boolean", - ) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean") - self.assert_series_equal(result, expected) - def test_searchsorted(self, data_for_sorting, as_series): # override because we only have 2 unique values data_for_sorting = pd.array([True, False], dtype="boolean") diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index f67f7dc56d26f..60c78b46a4832 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -173,7 +173,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - pass + _combine_le_expected_dtype = object # TODO: can we make this boolean? class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 788a0bf46afc5..936764c3627d0 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -201,7 +201,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - pass + _combine_le_expected_dtype = object # TODO: can we make this boolean? class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 9c5b3426246a8..745911871694c 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -270,28 +270,7 @@ def test_fillna_frame(self, data_missing): class TestMethods(BaseSparseTests, base.BaseMethodsTests): - def test_combine_le(self, data_repeated): - # We return a Series[SparseArray].__le__ returns a - # Series[Sparse[bool]] - # rather than Series[bool] - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series( - SparseArray( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], - fill_value=False, - ) - ) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series( - SparseArray([a <= val for a in list(orig_data1)], fill_value=False) - ) - self.assert_series_equal(result, expected) + _combine_le_expected_dtype = "Sparse[bool]" def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index a77316cbc0ea6..bdeb11dbb8f19 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -1,3 +1,8 @@ +from datetime import ( + date, + time, + timedelta, +) import pickle import numpy as np @@ -67,7 +72,21 @@ def test_arithmetic_ops(all_arithmetic_functions, other): @pytest.mark.parametrize( - "other", [NA, 1, 1.0, "a", b"a", np.int64(1), np.nan, np.bool_(True)] + "other", + [ + NA, + 1, + 1.0, + "a", + b"a", + np.int64(1), + np.nan, + np.bool_(True), + time(0), + date(1, 2, 3), + timedelta(1), + pd.NaT, + ], ) def test_comparison_ops(comparison_op, other): assert comparison_op(NA, other) is NA