Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise error in reindex when index is not unique #14429

Merged
merged 1 commit into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,10 @@ def _reindex(

df = self
if index is not None:
if not df._index.is_unique:
raise ValueError(
"cannot reindex on an axis with duplicate labels"
)
index = cudf.core.index.as_index(
index, name=getattr(index, "name", self._index.name)
)
Expand Down
12 changes: 12 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10723,3 +10723,15 @@ def test_dataframe_series_dot():
expected = gser @ [12, 13]

assert_eq(expected, actual)


def test_dataframe_duplicate_index_reindex():
gdf = cudf.DataFrame({"a": [0, 1, 2, 3]}, index=[0, 0, 1, 1])
pdf = gdf.to_pandas()

assert_exceptions_equal(
gdf.reindex,
pdf.reindex,
lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
)
12 changes: 12 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2638,3 +2638,15 @@ def test_series_setitem_mixed_bool_dtype():
s = cudf.Series([True, False, True])
with pytest.raises(TypeError):
s[0] = 10


def test_series_duplicate_index_reindex():
gs = cudf.Series([0, 1, 2, 3], index=[0, 0, 1, 1])
ps = gs.to_pandas()

assert_exceptions_equal(
gs.reindex,
ps.reindex,
lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
)
13 changes: 4 additions & 9 deletions python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,28 +437,23 @@ def union_categoricals_cudf(
)


@_dask_cudf_nvtx_annotate
def safe_hash(frame):
return cudf.Series(frame.hash_values(), index=frame.index)


@hash_object_dispatch.register((cudf.DataFrame, cudf.Series))
@_dask_cudf_nvtx_annotate
def hash_object_cudf(frame, index=True):
if index:
return safe_hash(frame.reset_index())
return safe_hash(frame)
frame = frame.reset_index()
return frame.hash_values()


@hash_object_dispatch.register(cudf.BaseIndex)
@_dask_cudf_nvtx_annotate
def hash_object_cudf_index(ind, index=None):

if isinstance(ind, cudf.MultiIndex):
return safe_hash(ind.to_frame(index=False))
return ind.to_frame(index=False).hash_values()

col = cudf.core.column.as_column(ind)
return safe_hash(cudf.Series(col))
return cudf.Series(col).hash_values()


@group_split_dispatch.register((cudf.Series, cudf.DataFrame))
Expand Down