From e237641e2b86e2ed386e5626e5d61913c05c7be9 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 20 Jan 2023 10:37:16 -0800 Subject: [PATCH 1/3] ENH: pd.NA comparison with time, date, timedelta --- pandas/_libs/missing.pyx | 9 +++++++++ pandas/tests/extension/base/methods.py | 11 +++++++++-- pandas/tests/extension/test_arrow.py | 6 +----- pandas/tests/extension/test_boolean.py | 19 ++----------------- pandas/tests/extension/test_sparse.py | 23 +---------------------- pandas/tests/scalar/test_na_scalar.py | 21 ++++++++++++++++++++- 6 files changed, 42 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index fc94d221a63b9..e6516b004a973 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -3,6 +3,11 @@ import numbers from sys import maxsize cimport cython +from cpython.datetime cimport ( + date, + time, + timedelta, +) from cython cimport Py_ssize_t import numpy as np @@ -307,6 +312,7 @@ def is_numeric_na(values: ndarray) -> ndarray: def _create_binary_propagating_op(name, is_divmod=False): + is_cmp = name.strip("_") in ["eq", "ne", "le", "lt", "ge", "gt"] def method(self, other): if (other is C_NA or isinstance(other, (str, bytes)) @@ -329,6 +335,9 @@ def _create_binary_propagating_op(name, is_divmod=False): else: return out + elif is_cmp and isinstance(other, (date, time, timedelta)): + return NA + return NotImplemented method.__name__ = name diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 2df410dff2b00..84f41718d62f5 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -260,6 +260,9 @@ def test_fillna_length_mismatch(self, data_missing): with pytest.raises(ValueError, match=msg): data_missing.fillna(data_missing.take([1])) + # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool] + _combine_le_expected_dtype = np.dtype(bool) + def test_combine_le(self, data_repeated): # GH 20825 # Test that combine works when doing a <= (le) comparison @@ -268,13 +271,17 @@ def test_combine_le(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))] + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + dtype=self._combine_le_expected_dtype, ) self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= val for a in list(orig_data1)]) + expected = pd.Series( + [a <= val for a in list(orig_data1)], + dtype=self._combine_le_expected_dtype, + ) self.assert_series_equal(result, expected) def test_combine_add(self, data_repeated): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a7c243cdfe74f..72a49f8f1082a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -949,11 +949,7 @@ def test_factorize(self, data_for_grouping, request): ) super().test_factorize(data_for_grouping) - @pytest.mark.xfail( - reason="result dtype pyarrow[bool] better than expected dtype object" - ) - def test_combine_le(self, data_repeated): - super().test_combine_le(data_repeated) + _combine_le_expected_dtype = "bool[pyarrow]" def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index b611701e4e429..8c8cbfa5200b1 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -176,6 +176,8 @@ class TestReshaping(base.BaseReshapingTests): class TestMethods(base.BaseMethodsTests): + _combine_le_expected_dtype = "boolean" + def test_factorize(self, data_for_grouping): # override because we only have 2 unique values labels, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True) @@ -185,23 +187,6 @@ def test_factorize(self, data_for_grouping): tm.assert_numpy_array_equal(labels, expected_labels) self.assert_extension_array_equal(uniques, expected_uniques) - def test_combine_le(self, data_repeated): - # override because expected needs to be boolean instead of bool dtype - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], - dtype="boolean", - ) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean") - self.assert_series_equal(result, expected) - def test_searchsorted(self, data_for_sorting, as_series): # override because we only have 2 unique values data_for_sorting = pd.array([True, False], dtype="boolean") diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 9c5b3426246a8..745911871694c 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -270,28 +270,7 @@ def test_fillna_frame(self, data_missing): class TestMethods(BaseSparseTests, base.BaseMethodsTests): - def test_combine_le(self, data_repeated): - # We return a Series[SparseArray].__le__ returns a - # Series[Sparse[bool]] - # rather than Series[bool] - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series( - SparseArray( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], - fill_value=False, - ) - ) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 <= x2) - expected = pd.Series( - SparseArray([a <= val for a in list(orig_data1)], fill_value=False) - ) - self.assert_series_equal(result, expected) + _combine_le_expected_dtype = "Sparse[bool]" def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index a77316cbc0ea6..bdeb11dbb8f19 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -1,3 +1,8 @@ +from datetime import ( + date, + time, + timedelta, +) import pickle import numpy as np @@ -67,7 +72,21 @@ def test_arithmetic_ops(all_arithmetic_functions, other): @pytest.mark.parametrize( - "other", [NA, 1, 1.0, "a", b"a", np.int64(1), np.nan, np.bool_(True)] + "other", + [ + NA, + 1, + 1.0, + "a", + b"a", + np.int64(1), + np.nan, + np.bool_(True), + time(0), + date(1, 2, 3), + timedelta(1), + pd.NaT, + ], ) def test_comparison_ops(comparison_op, other): assert comparison_op(NA, other) is NA From 822e53c534228769a458007f33e5c95cd13d365f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 20 Jan 2023 19:28:50 -0800 Subject: [PATCH 2/3] mypy fixup --- pandas/tests/extension/base/methods.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 84f41718d62f5..54d41fa9d972a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas._typing import Dtype + from pandas.core.dtypes.common import is_bool_dtype from pandas.core.dtypes.missing import na_value_for_dtype @@ -261,7 +263,7 @@ def test_fillna_length_mismatch(self, data_missing): data_missing.fillna(data_missing.take([1])) # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool] - _combine_le_expected_dtype = np.dtype(bool) + _combine_le_expected_dtype: Dtype = np.dtype(bool) def test_combine_le(self, data_repeated): # GH 20825 From 4b60b40cc3b9624b322da4d0e8f2d31b6def419c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 24 Jan 2023 11:15:40 -0800 Subject: [PATCH 3/3] fix on nullable dtypes --- pandas/tests/extension/test_floating.py | 2 +- pandas/tests/extension/test_integer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index f67f7dc56d26f..60c78b46a4832 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -173,7 +173,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - pass + _combine_le_expected_dtype = object # TODO: can we make this boolean? class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 788a0bf46afc5..936764c3627d0 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -201,7 +201,7 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): - pass + _combine_le_expected_dtype = object # TODO: can we make this boolean? class TestCasting(base.BaseCastingTests):