From a3ae461eb7763d2d43d16954c56921fee3714e70 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 4 Nov 2022 17:48:48 +0000 Subject: [PATCH 1/4] Fix off-by-one in copy-range Only when copying a zero-length range can we afford to return the target column. Closes #12073. --- python/cudf/cudf/_lib/copying.pyx | 2 +- python/cudf/cudf/tests/test_setitem.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index d9a7a5b8754..f5c910ca77d 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -144,7 +144,7 @@ def copy_range(Column input_column, target_column from target_begin to target_end """ - if abs(target_end - target_begin) <= 1: + if abs(target_end - target_begin) < 1: return target_column if target_begin < 0: diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 13b342e6c3b..bbf6f14d99f 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -108,6 +108,12 @@ def test_series_set_item(psr, arg): assert_eq(psr, gsr) +def test_series_setitem_singleton_range(): + sr = cudf.Series([1, 2, 3], dtype=np.int64) + sr.iloc[:1] = np.asarray([7], dtype=np.int64) + assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64)) + + @pytest.mark.parametrize( "df", [ From ca2eba38c1ad2b48bf9614c176aabb28f9478d66 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 4 Nov 2022 17:54:36 +0000 Subject: [PATCH 2/4] Simplify copy_range dispatch Precondition is that range indices have been sanitised. --- python/cudf/cudf/_lib/copying.pyx | 41 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index f5c910ca77d..1de91e6a3e9 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -132,36 +132,37 @@ def _copy_range(Column input_column, return Column.from_unique_ptr(move(c_result)) -def copy_range(Column input_column, +def copy_range(Column source_column, Column target_column, - size_type input_begin, - size_type input_end, + size_type source_begin, + size_type source_end, size_type target_begin, size_type target_end, bool inplace): """ - Copy input_column from input_begin to input_end to - target_column from target_begin to target_end - """ - - if abs(target_end - target_begin) < 1: - return target_column + Copy a contiguous range from a source to a target column - if target_begin < 0: - target_begin = target_begin + target_column.size - - if target_end < 0: - target_end = target_end + target_column.size + Notes + ----- + Expects the source and target ranges to have been sanitised to be + in-range for the source and target column respectively. For + example via ``slice.indices``. + """ - if target_begin > target_end: + assert ( + source_end - source_begin == target_end - target_begin, + "Source and target ranges must be same length" + ) + if target_end >= target_begin and inplace: + # FIXME: Are we allowed to do this when inplace=False? return target_column - if inplace is True: - _copy_range_in_place(input_column, target_column, - input_begin, input_end, target_begin) + if inplace: + _copy_range_in_place(source_column, target_column, + source_begin, source_end, target_begin) else: - return _copy_range(input_column, target_column, - input_begin, input_end, target_begin) + return _copy_range(source_column, target_column, + source_begin, source_end, target_begin) def gather( From e9639c66b643a7241a554729eadf47851dac73d9 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 8 Nov 2022 10:10:23 +0000 Subject: [PATCH 3/4] Also test against pandas --- python/cudf/cudf/tests/test_setitem.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index bbf6f14d99f..e612eda31f6 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -110,8 +110,12 @@ def test_series_set_item(psr, arg): def test_series_setitem_singleton_range(): sr = cudf.Series([1, 2, 3], dtype=np.int64) - sr.iloc[:1] = np.asarray([7], dtype=np.int64) + psr = sr.to_pandas() + value = np.asarray([7], dtype=np.int64) + sr.iloc[:1] = value + psr.iloc[:1] = value assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64)) + assert_eq(sr, psr) @pytest.mark.parametrize( From 9e9c20a76de22e1726c285055ab7c3bc552c657c Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 11 Nov 2022 18:27:45 +0000 Subject: [PATCH 4/4] Check the dtype as well --- python/cudf/cudf/tests/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index e612eda31f6..95848c29cc2 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -115,7 +115,7 @@ def test_series_setitem_singleton_range(): sr.iloc[:1] = value psr.iloc[:1] = value assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64)) - assert_eq(sr, psr) + assert_eq(sr, psr, check_dtype=True) @pytest.mark.parametrize(