Skip to content

Commit

Permalink
add duplicate column name validation
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Jan 25, 2022
1 parent a552afb commit b1dfe93
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
15 changes: 10 additions & 5 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -523,14 +523,19 @@ def rename_column(x):
raise IndexError(
f"Too many levels: Index has only 1 level, not {level+1}"
)

if isinstance(mapper, Mapping):
new_names = (
new_col_names = [
mapper.get(col_name, col_name) for col_name in self.keys()
)
]
else:
new_names = (mapper(col_name) for col_name in self.keys())
new_col_names = [mapper(col_name) for col_name in self.keys()]

if len(new_col_names) != len(set(new_col_names)):
raise ValueError("Duplicate column names are not allowed")

ca = ColumnAccessor(
dict(zip(new_names, self.values())),
dict(zip(new_col_names, self.values())),
level_names=self.level_names,
multiindex=self.multiindex,
)
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9083,3 +9083,11 @@ def test_dataframe_assign_cp_np_array():
gdf[[f"f_{i}" for i in range(n)]] = cp_ndarray

assert_eq(pdf, gdf)


def test_dataframe_rename_duplicate_column():
gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
with pytest.raises(
ValueError, match="Duplicate column names are not allowed"
):
gdf.rename(columns={"a": "b"}, inplace=True)

0 comments on commit b1dfe93

Please sign in to comment.