From 9014e8bc2059b91ed420f83a83702eeb9e2ebc72 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 18 May 2023 11:18:57 -0700 Subject: [PATCH 1/4] use cupy for dataframe copy tests --- python/cudf/cudf/tests/test_dataframe_copy.py | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 85e994bd733..986462f1a7e 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -1,10 +1,10 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. from copy import copy, deepcopy +import cupy as cp import numpy as np import pandas as pd import pytest -from numba import cuda from cudf.core.dataframe import DataFrame from cudf.testing._utils import ALL_TYPES, assert_eq @@ -131,27 +131,6 @@ def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type): assert not copy_df.to_string().split() == df.to_string().split() -@cuda.jit -def group_mean(data, segments, output): - i = cuda.grid(1) - if i < segments.size: - s = segments[i] - e = segments[i + 1] if (i + 1) < segments.size else data.size - # mean calculation - carry = 0.0 - n = e - s - for j in range(s, e): - carry += data[j] - output[i] = carry / n - - -@cuda.jit -def add_one(data): - i = cuda.grid(1) - if i == 1: - data[i] = data[i] + 1.0 - - def test_kernel_deep_copy(): pdf = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] @@ -160,7 +139,7 @@ def test_kernel_deep_copy(): cdf = gdf.copy(deep=True) sr = gdf["b"] - add_one[1, len(sr)](sr._column.data_array_view(mode="write")) + cp.asarray(sr._column)[1] = 42 assert not gdf.to_string().split() == cdf.to_string().split() @@ -171,7 +150,8 @@ def test_kernel_shallow_copy(): gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf["a"] - add_one[1, len(sr)](sr.to_cupy()) + cp.asarray(sr._column)[1] = 42 + assert_eq(gdf, cdf) From 83bc7359aff39c682f69a37db5df8618ca5bc50b Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 18 May 2023 14:26:05 -0500 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_dataframe_copy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 986462f1a7e..92cef43ed4a 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -131,7 +131,7 @@ def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type): assert not copy_df.to_string().split() == df.to_string().split() -def test_kernel_deep_copy(): +def test_deep_copy_write_in_place(): pdf = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] ) @@ -139,8 +139,10 @@ def test_kernel_deep_copy(): cdf = gdf.copy(deep=True) sr = gdf["b"] + # Write a value in-place on the deep copy. + # This should only affect the copy and not the original. cp.asarray(sr._column)[1] = 42 - assert not gdf.to_string().split() == cdf.to_string().split() + assert_neq(gdf, cdf) def test_kernel_shallow_copy(): @@ -150,6 +152,8 @@ def test_kernel_shallow_copy(): gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf["a"] + # Write a value in-place on the shallow copy. + # This should change the copy and original. cp.asarray(sr._column)[1] = 42 assert_eq(gdf, cdf) From cbd00dce152d92b20886f32e8a90bd1ab1fefb48 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 18 May 2023 14:38:54 -0500 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_dataframe_copy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 92cef43ed4a..4080c06eb8b 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -142,16 +142,18 @@ def test_deep_copy_write_in_place(): # Write a value in-place on the deep copy. # This should only affect the copy and not the original. cp.asarray(sr._column)[1] = 42 + assert_neq(gdf, cdf) -def test_kernel_shallow_copy(): +def test_shallow_copy_write_in_place(): pdf = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] ) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf["a"] + # Write a value in-place on the shallow copy. # This should change the copy and original. cp.asarray(sr._column)[1] = 42 From 52bba09107d626611cb42b4cca0aa2a88ddab7c8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 18 May 2023 12:40:56 -0700 Subject: [PATCH 4/4] add missing import --- python/cudf/cudf/tests/test_dataframe_copy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 4080c06eb8b..fec52d82ab1 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -7,7 +7,7 @@ import pytest from cudf.core.dataframe import DataFrame -from cudf.testing._utils import ALL_TYPES, assert_eq +from cudf.testing._utils import ALL_TYPES, assert_eq, assert_neq """ DataFrame copy expectations