Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce deep copies in Index ops #16054

Merged
merged 2 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@ def difference(self, other, sort=None):
res_name = _get_result_name(self.name, other.name)

if is_mixed_with_object_dtype(self, other) or len(other) == 0:
difference = self.copy().unique()
difference = self.unique()
difference.name = res_name
if sort is True:
return difference.sort_values()
Expand Down Expand Up @@ -1746,7 +1746,7 @@ def rename(self, name, inplace=False):
self.name = name
return None
else:
out = self.copy(deep=True)
out = self.copy(deep=False)
out.name = name
return out

Expand Down Expand Up @@ -2070,7 +2070,7 @@ def dropna(self, how="any"):
raise ValueError(f"{how=} must be 'any' or 'all'")
try:
if not self.hasnans:
return self.copy()
return self.copy(deep=False)
except NotImplementedError:
pass
# This is to be consistent with IndexedFrame.dropna to handle nans
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def memory_usage(self, deep: bool = False) -> int:

def unique(self) -> Self:
# RangeIndex always has unique values
return self
return self.copy()

@_cudf_nvtx_annotate
def __mul__(self, other):
Expand Down Expand Up @@ -3206,7 +3206,8 @@ def _get_nearest_indexer(
)
right_indexer = _get_indexer_basic(
index=index,
positions=positions.copy(deep=True),
# positions no longer used so don't copy
positions=positions,
method="backfill",
target_col=target_col,
tolerance=tolerance,
Expand Down
25 changes: 23 additions & 2 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,10 @@ def test_index_rename_inplace():
pds = pd.Index([1, 2, 3], name="asdf")
gds = Index(pds)

# inplace=False should yield a deep copy
# inplace=False should yield a shallow copy
gds_renamed_deep = gds.rename("new_name", inplace=False)

assert gds_renamed_deep._values.data_ptr != gds._values.data_ptr
assert gds_renamed_deep._values.data_ptr == gds._values.data_ptr

# inplace=True returns none
expected_ptr = gds._values.data_ptr
Expand Down Expand Up @@ -3214,6 +3214,27 @@ def test_rangeindex_dropna():
assert_eq(result, expected)


def test_rangeindex_unique_shallow_copy():
ri_pandas = pd.RangeIndex(1)
result = ri_pandas.unique()
assert result is not ri_pandas

ri_cudf = cudf.RangeIndex(1)
result = ri_cudf.unique()
assert result is not ri_cudf
assert_eq(result, ri_cudf)


def test_rename_shallow_copy():
idx = pd.Index([1])
result = idx.rename("a")
assert idx.to_numpy(copy=False) is result.to_numpy(copy=False)

idx = cudf.Index([1])
result = idx.rename("a")
assert idx._column is result._column


@pytest.mark.parametrize("data", [range(2), [10, 11, 12]])
def test_index_contains_hashable(data):
gidx = cudf.Index(data)
Expand Down
Loading