From a56cc27a348a514ccca1b64f5c5ae721c088a42e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 10 Jan 2023 07:52:03 -0800 Subject: [PATCH 1/8] add tests, pass through non-object type scalars --- python/cudf/cudf/core/column/string.py | 7 +++++ python/cudf/cudf/tests/test_binops.py | 40 ++++++++++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c5d5b396ebc..1e9109991df 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5612,6 +5612,13 @@ def _binaryop( return NotImplemented if isinstance(other, (StringColumn, str, cudf.Scalar)): + if isinstance(other, cudf.Scalar) and other.dtype != "O": + if op in {"__eq__", "__lt__", "__le__", "__gt__", "__ge__"}: + result = column.full(len(self), False, dtype="bool") + elif op == "__ne__": + result = column.full(len(self), True, dtype="bool") + result = result.set_mask(self.mask) + return result if op == "__add__": if isinstance(other, cudf.Scalar): other = cast( diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index e5ade1326c9..d8287e3a23b 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -319,15 +319,38 @@ def test_series_compare_nulls(cmpop, dtypes): got = cmpop(lser, rser) utils.assert_eq(expect, got) +def string_series_compare_test_cases(): + cases = [] + pd_sr = pd.Series(["a", "b", None, "d", "e", None], dtype='string') + all_cmpop_cases = [ + (pd_sr, pd_sr), + (pd_sr, 'a'), + ('a', pd_sr), + ] -@pytest.mark.parametrize( - "obj", [pd.Series(["a", "b", None, "d", "e", None], dtype="string"), "a"] -) -@pytest.mark.parametrize("cmpop", _cmpops) -@pytest.mark.parametrize( - "cmp_obj", - [pd.Series(["b", "a", None, "d", "f", None], dtype="string"), "a"], -) + for op in _cmpops: + for case in all_cmpop_cases: + cases.append( + (*case, op) + ) + + eq_neq_cases = ( + (pd_sr, 1), + (1, pd_sr), + (pd_sr, 1.5), + (1.5, pd_sr), + (pd_sr, True), + (True, pd_sr) + ) + for case in eq_neq_cases: + cases += [ + (*case, operator.eq), + (*case, operator.ne) + ] + + return cases + +@pytest.mark.parametrize('obj, cmp_obj, cmpop', string_series_compare_test_cases()) def test_string_series_compare(obj, cmpop, cmp_obj): g_obj = obj @@ -344,7 +367,6 @@ def test_string_series_compare(obj, cmpop, cmp_obj): utils.assert_eq(expected, got) - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("nelem", [1, 2, 100]) @pytest.mark.parametrize("cmpop", _cmpops) From a694b2bff3c2dd105bf9adf2838dc03e37d9289f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 10 Jan 2023 07:53:51 -0800 Subject: [PATCH 2/8] style --- python/cudf/cudf/tests/test_binops.py | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index d8287e3a23b..82a5017ac1b 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import decimal import operator @@ -319,38 +319,37 @@ def test_series_compare_nulls(cmpop, dtypes): got = cmpop(lser, rser) utils.assert_eq(expect, got) + def string_series_compare_test_cases(): cases = [] - pd_sr = pd.Series(["a", "b", None, "d", "e", None], dtype='string') + pd_sr = pd.Series(["a", "b", None, "d", "e", None], dtype="string") all_cmpop_cases = [ (pd_sr, pd_sr), - (pd_sr, 'a'), - ('a', pd_sr), + (pd_sr, "a"), + ("a", pd_sr), ] for op in _cmpops: for case in all_cmpop_cases: - cases.append( - (*case, op) - ) - + cases.append((*case, op)) + eq_neq_cases = ( (pd_sr, 1), (1, pd_sr), (pd_sr, 1.5), (1.5, pd_sr), (pd_sr, True), - (True, pd_sr) + (True, pd_sr), ) for case in eq_neq_cases: - cases += [ - (*case, operator.eq), - (*case, operator.ne) - ] + cases += [(*case, operator.eq), (*case, operator.ne)] return cases -@pytest.mark.parametrize('obj, cmp_obj, cmpop', string_series_compare_test_cases()) + +@pytest.mark.parametrize( + "obj, cmp_obj, cmpop", string_series_compare_test_cases() +) def test_string_series_compare(obj, cmpop, cmp_obj): g_obj = obj @@ -367,6 +366,7 @@ def test_string_series_compare(obj, cmpop, cmp_obj): utils.assert_eq(expected, got) + @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("nelem", [1, 2, 100]) @pytest.mark.parametrize("cmpop", _cmpops) From 8fb8c11931d42f68d3ebd8cc3c5413912816ef69 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 10 Jan 2023 08:11:49 -0800 Subject: [PATCH 3/8] small update --- python/cudf/cudf/core/column/string.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 1e9109991df..29df5ca5997 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5576,7 +5576,7 @@ def normalize_binop_value( and other.dtype == "object" ): return other - if isinstance(other, str): + if is_scalar(other): return cudf.Scalar(other) return NotImplemented @@ -5614,11 +5614,13 @@ def _binaryop( if isinstance(other, (StringColumn, str, cudf.Scalar)): if isinstance(other, cudf.Scalar) and other.dtype != "O": if op in {"__eq__", "__lt__", "__le__", "__gt__", "__ge__"}: - result = column.full(len(self), False, dtype="bool") + val = False elif op == "__ne__": - result = column.full(len(self), True, dtype="bool") - result = result.set_mask(self.mask) - return result + val = True + return column.full(len(self), val, dtype="bool").set_mask( + self.mask + ) + if op == "__add__": if isinstance(other, cudf.Scalar): other = cast( From 671e7e5109d10ba7250ca31941b3b2a4d00fdebd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 11 Jan 2023 07:18:21 -0800 Subject: [PATCH 4/8] pass string scalars through datetime binops if possible --- python/cudf/cudf/core/column/datetime.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 56436ac141d..0c546168fe3 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -261,6 +261,11 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: return cudf.Scalar(None, dtype=other.dtype) return cudf.Scalar(other) + elif isinstance(other, str): + try: + return cudf.Scalar(other, dtype=self.dtype) + except ValueError: + pass return NotImplemented From 8c30b11043e5ef626e2dd78a158c1988c2b8399a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 12 Jan 2023 06:28:40 -0800 Subject: [PATCH 5/8] add tests for passing through strings to datetime binops --- python/cudf/cudf/tests/test_datetime.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 5616cea42ba..1211938ff10 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -22,6 +22,15 @@ expect_warning_if, ) +_cmpops = [ + operator.lt, + operator.gt, + operator.le, + operator.ge, + operator.eq, + operator.ne, +] + def data1(): return pd.date_range("20010101", "20020215", freq="400h", name="times") @@ -986,6 +995,23 @@ def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op): ) +@pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"]) +@pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"]) +@pytest.mark.parametrize("op", _cmpops) +@pytest.mark.parametrize( + "dtype", + ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"], +) +def test_datetime_series_cmpops_with_scalars(data, other_scalars, dtype, op): + gsr = cudf.Series(data=data, dtype=dtype) + psr = gsr.to_pandas() + + expect = op(psr, other_scalars) + got = op(gsr, other_scalars) + + assert_eq(expect, got) + + @pytest.mark.parametrize( "data", [ From 12bdb885bb4989b605fbc193c269fe5229093c08 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 2 Feb 2023 12:16:03 -0800 Subject: [PATCH 6/8] adjust logic --- python/cudf/cudf/core/column/string.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 31ef033e8bd..343a2a48a7b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5702,13 +5702,22 @@ def _binaryop( if isinstance(other, (StringColumn, str, cudf.Scalar)): if isinstance(other, cudf.Scalar) and other.dtype != "O": - if op in {"__eq__", "__lt__", "__le__", "__gt__", "__ge__"}: + if op in { + "__eq__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + "__ne__", + }: val = False - elif op == "__ne__": - val = True - return column.full(len(self), val, dtype="bool").set_mask( - self.mask - ) + if op == "__ne__": + val = True + return column.full(len(self), val, dtype="bool").set_mask( + self.mask + ) + else: + return NotImplemented if op == "__add__": if isinstance(other, cudf.Scalar): From aa1c6a51a2279330ae1f532b1b8d0723bc8db557 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 6 Feb 2023 11:02:31 -0800 Subject: [PATCH 7/8] Adjust logic --- python/cudf/cudf/core/column/string.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 343a2a48a7b..ce8bc3da08b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5704,18 +5704,11 @@ def _binaryop( if isinstance(other, cudf.Scalar) and other.dtype != "O": if op in { "__eq__", - "__lt__", - "__le__", - "__gt__", - "__ge__", "__ne__", }: - val = False - if op == "__ne__": - val = True - return column.full(len(self), val, dtype="bool").set_mask( - self.mask - ) + return column.full( + len(self), op == "__ne__", dtype="bool" + ).set_mask(self.mask) else: return NotImplemented From 7172f04a29821df368231e26f8578adccf0309f6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 8 Feb 2023 08:58:46 -0800 Subject: [PATCH 8/8] refactor to fixtures --- python/cudf/cudf/tests/test_binops.py | 91 ++++++++++++++++----------- 1 file changed, 54 insertions(+), 37 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 82a5017ac1b..7d01f89eada 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -320,51 +320,68 @@ def test_series_compare_nulls(cmpop, dtypes): utils.assert_eq(expect, got) -def string_series_compare_test_cases(): - cases = [] - pd_sr = pd.Series(["a", "b", None, "d", "e", None], dtype="string") - all_cmpop_cases = [ - (pd_sr, pd_sr), - (pd_sr, "a"), - ("a", pd_sr), - ] +@pytest.fixture +def str_series_cmp_data(): + return pd.Series(["a", "b", None, "d", "e", None], dtype="string") + + +@pytest.fixture(ids=[op.__name__ for op in _cmpops], params=_cmpops) +def str_series_compare_str_cmpop(request): + return request.param + + +@pytest.fixture(ids=["eq", "ne"], params=[operator.eq, operator.ne]) +def str_series_compare_num_cmpop(request): + return request.param + - for op in _cmpops: - for case in all_cmpop_cases: - cases.append((*case, op)) - - eq_neq_cases = ( - (pd_sr, 1), - (1, pd_sr), - (pd_sr, 1.5), - (1.5, pd_sr), - (pd_sr, True), - (True, pd_sr), +@pytest.fixture(ids=["int", "float", "bool"], params=[1, 1.5, True]) +def cmp_scalar(request): + return request.param + + +def test_str_series_compare_str( + str_series_cmp_data, str_series_compare_str_cmpop +): + expect = str_series_compare_str_cmpop(str_series_cmp_data, "a") + got = str_series_compare_str_cmpop( + Series.from_pandas(str_series_cmp_data), "a" ) - for case in eq_neq_cases: - cases += [(*case, operator.eq), (*case, operator.ne)] - return cases + utils.assert_eq(expect, got.to_pandas(nullable=True)) -@pytest.mark.parametrize( - "obj, cmp_obj, cmpop", string_series_compare_test_cases() -) -def test_string_series_compare(obj, cmpop, cmp_obj): +def test_str_series_compare_str_reflected( + str_series_cmp_data, str_series_compare_str_cmpop +): + expect = str_series_compare_str_cmpop("a", str_series_cmp_data) + got = str_series_compare_str_cmpop( + "a", Series.from_pandas(str_series_cmp_data) + ) - g_obj = obj - if isinstance(g_obj, pd.Series): - g_obj = Series.from_pandas(g_obj) - g_cmp_obj = cmp_obj - if isinstance(g_cmp_obj, pd.Series): - g_cmp_obj = Series.from_pandas(g_cmp_obj) - got = cmpop(g_obj, g_cmp_obj) - expected = cmpop(obj, cmp_obj) + utils.assert_eq(expect, got.to_pandas(nullable=True)) - if isinstance(expected, pd.Series): - expected = cudf.from_pandas(expected) - utils.assert_eq(expected, got) +def test_str_series_compare_num( + str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar +): + expect = str_series_compare_num_cmpop(str_series_cmp_data, cmp_scalar) + got = str_series_compare_num_cmpop( + Series.from_pandas(str_series_cmp_data), cmp_scalar + ) + + utils.assert_eq(expect, got.to_pandas(nullable=True)) + + +def test_str_series_compare_num_reflected( + str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar +): + expect = str_series_compare_num_cmpop(cmp_scalar, str_series_cmp_data) + got = str_series_compare_num_cmpop( + cmp_scalar, Series.from_pandas(str_series_cmp_data) + ) + + utils.assert_eq(expect, got.to_pandas(nullable=True)) @pytest.mark.parametrize("obj_class", ["Series", "Index"])