From 34ba8464dceffc8a2c86bc0f53fe0676e2a413c9 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 9 Dec 2021 11:41:36 -0800 Subject: [PATCH 1/3] Adds assert_column_memory_equal utility --- python/cudf/cudf/testing/_utils.py | 19 +++++++++ python/cudf/cudf/tests/test_index.py | 60 +++------------------------- 2 files changed, 25 insertions(+), 54 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index cc5aec36853..aada654ecb2 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -310,3 +310,22 @@ def does_not_raise(): def xfail_param(param, **kwargs): return pytest.param(param, marks=pytest.mark.xfail(**kwargs)) + + +def assert_column_memory_eq( + lhs: cudf.core.column.ColumnBase, rhs: cudf.core.column.ColumnBase +): + assert lhs.base_data_ptr == rhs.base_data_ptr + assert lhs.base_mask_ptr == rhs.base_mask_ptr + for lhs_child, rhs_child in zip(lhs.base_children, rhs.base_children): + assert_column_memory_eq(lhs_child, rhs_child) + + +def assert_column_memory_ne( + lhs: cudf.core.column.ColumnBase, rhs: cudf.core.column.ColumnBase +): + try: + assert_column_memory_eq(lhs, rhs) + except AssertionError: + return + raise AssertionError("lhs and rhs holds the same memory.") diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index ab211616a02..91c0ed393dd 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -28,6 +28,8 @@ SIGNED_INTEGER_TYPES, SIGNED_TYPES, UNSIGNED_TYPES, + assert_column_memory_eq, + assert_column_memory_ne, assert_eq, assert_exceptions_equal, ) @@ -391,62 +393,12 @@ def test_index_copy_category(name, dtype, deep=True): ], ) def test_index_copy_deep(idx, deep): - """Test if deep copy creates a new instance for device data. - The general criterion is to compare `Buffer.ptr` between two data objects. - Specifically for: - - CategoricalIndex, this applies to both `.codes` and `.categories` - - StringIndex, to every element in `._base_children` - - Others, to `.base_data` - No test is defined for RangeIndex. - """ + """Test if deep copy creates a new instance for device data.""" idx_copy = idx.copy(deep=deep) - same_ref = not deep - if isinstance(idx, cudf.CategoricalIndex): - assert ( - idx._values.codes.base_data.ptr - == idx_copy._values.codes.base_data.ptr - ) == same_ref - if isinstance( - idx._values.categories, cudf.core.column.string.StringColumn - ): - children = idx._values.categories._base_children - copy_children = idx_copy._values.categories._base_children - assert all( - [ - ( - children[i].base_data.ptr - == copy_children[i].base_data.ptr - ) - == same_ref - for i in range(len(children)) - ] - ) - elif isinstance( - idx._values.categories, cudf.core.column.numerical.NumericalColumn - ): - assert ( - idx._values.categories.base_data.ptr - == idx_copy._values.categories.base_data.ptr - ) == same_ref - elif isinstance(idx, cudf.StringIndex): - children = idx._values._base_children - copy_children = idx_copy._values._base_children - assert all( - [ - ( - ( - children[i].base_data.ptr - == copy_children[i].base_data.ptr - ) - == same_ref - ) - for i in range(len(children)) - ] - ) + if not deep: + assert_column_memory_eq(idx._values, idx_copy._values) else: - assert ( - idx._values.base_data.ptr == idx_copy._values.base_data.ptr - ) == same_ref + assert_column_memory_ne(idx._values, idx_copy._values) @pytest.mark.parametrize("idx", [[1, None, 3, None, 5]]) From 10421ee191403a6cb7d5c6d2e4767052906afbc0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 20 Jan 2022 15:53:28 -0800 Subject: [PATCH 2/3] Check size, number of children and add docstring. --- python/cudf/cudf/testing/_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index aada654ecb2..1eb89dd48cb 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -315,8 +315,16 @@ def xfail_param(param, **kwargs): def assert_column_memory_eq( lhs: cudf.core.column.ColumnBase, rhs: cudf.core.column.ColumnBase ): + """Assert the memory location and size of `lhs` and `rhs` are equivalent. + + Both data pointer and mask pointer are checked. Also recursively check for + children to the same contarints. Also fails check if the number of children + mismatches at any level. + """ assert lhs.base_data_ptr == rhs.base_data_ptr assert lhs.base_mask_ptr == rhs.base_mask_ptr + assert lhs.size == rhs.size + assert len(lhs.base_children) == len(rhs.base_children) for lhs_child, rhs_child in zip(lhs.base_children, rhs.base_children): assert_column_memory_eq(lhs_child, rhs_child) From 85004078bc60d664614a9581cecc5ac3ed0280af Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 7 Mar 2022 14:59:32 -0800 Subject: [PATCH 3/3] Add test for memory test utils --- python/cudf/cudf/testing/_utils.py | 2 + python/cudf/cudf/tests/test_testing.py | 62 +++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index ab6e3466b25..f6b5e0f3ccc 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -344,6 +344,8 @@ def assert_column_memory_eq( """ assert lhs.base_data_ptr == rhs.base_data_ptr assert lhs.base_mask_ptr == rhs.base_mask_ptr + assert lhs.base_size == rhs.base_size + assert lhs.offset == rhs.offset assert lhs.size == rhs.size assert len(lhs.base_children) == len(rhs.base_children) for lhs_child, rhs_child in zip(lhs.base_children, rhs.base_children): diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py index 0b27c562d75..4dc4d86d94c 100644 --- a/python/cudf/cudf/tests/test_testing.py +++ b/python/cudf/cudf/tests/test_testing.py @@ -1,7 +1,8 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np import pandas as pd +import pyarrow as pa import pytest import cudf @@ -11,10 +12,28 @@ assert_index_equal, assert_series_equal, ) -from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq +from cudf.testing._utils import ( + NUMERIC_TYPES, + OTHER_TYPES, + assert_column_memory_eq, + assert_column_memory_ne, + assert_eq, +) from cudf.testing.testing import assert_column_equal +@pytest.fixture( + params=[ + pa.array([*range(10)]), + pa.array(["hello", "world", "rapids", "AI"]), + pa.array([[1, 2, 3], [4, 5], [6], [], [7]]), + pa.array([{"f0": "hello", "f1": 42}, {"f0": "world", "f1": 3}]), + ] +) +def arrow_arrays(request): + return request.param + + @pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]]) @pytest.mark.parametrize("exact", ["equiv", True, False]) @pytest.mark.parametrize("check_names", [True, False]) @@ -369,3 +388,42 @@ def test_basic_scalar_equality(left, right): def test_basic_scalar_inequality(left, right): with pytest.raises(AssertionError, match=r".*not (almost )?equal.*"): assert_eq(left, right) + + +def test_assert_column_memory_basic(arrow_arrays): + left = cudf.core.column.ColumnBase.from_arrow(arrow_arrays) + right = cudf.core.column.ColumnBase.from_arrow(arrow_arrays) + + with pytest.raises(AssertionError): + assert_column_memory_eq(left, right) + assert_column_memory_ne(left, right) + + +def test_assert_column_memory_slice(arrow_arrays): + col = cudf.core.column.ColumnBase.from_arrow(arrow_arrays) + left = col[0:1] + right = col[1:2] + + with pytest.raises(AssertionError): + assert_column_memory_eq(left, right) + assert_column_memory_ne(left, right) + + with pytest.raises(AssertionError): + assert_column_memory_eq(left, col) + assert_column_memory_ne(left, col) + + with pytest.raises(AssertionError): + assert_column_memory_eq(right, col) + assert_column_memory_ne(right, col) + + +def test_assert_column_memory_basic_same(arrow_arrays): + data = cudf.core.column.ColumnBase.from_arrow(arrow_arrays) + buf = cudf.core.buffer.Buffer(data=data.base_data, owner=data) + + left = cudf.core.column.build_column(buf, dtype=np.int32) + right = cudf.core.column.build_column(buf, dtype=np.int32) + + assert_column_memory_eq(left, right) + with pytest.raises(AssertionError): + assert_column_memory_ne(left, right)