Skip to content

Commit

Permalink
Fix dataframe setitem with ndarray types (#10056)
Browse files Browse the repository at this point in the history
Fixes: #9928 

This PR fixes 2d array assignment in `setitem`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Michael Wang (https://github.com/isVoid)

URL: #10056
  • Loading branch information
galipremsagar authored Jan 14, 2022
1 parent 12adb8a commit 8c8d6ef
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
18 changes: 16 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,15 @@ def __setitem__(self, arg, value):
for col_name in self._data:
self._data[col_name][mask] = value
else:
if isinstance(value, DataFrame):
if isinstance(value, (cupy.ndarray, np.ndarray)):
_setitem_with_dataframe(
input_df=self,
replace_df=cudf.DataFrame(value),
input_cols=arg,
mask=None,
ignore_index=True,
)
elif isinstance(value, DataFrame):
_setitem_with_dataframe(
input_df=self,
replace_df=value,
Expand Down Expand Up @@ -6401,13 +6409,15 @@ def _setitem_with_dataframe(
replace_df: DataFrame,
input_cols: Any = None,
mask: Optional[cudf.core.column.ColumnBase] = None,
ignore_index: bool = False,
):
"""
This function sets item dataframes relevant columns with replacement df
:param input_df: Dataframe to be modified inplace
:param replace_df: Replacement DataFrame to replace values with
:param input_cols: columns to replace in the input dataframe
:param mask: boolean mask in case of masked replacing
:param ignore_index: Whether to conduct index equality and reindex
"""

if input_cols is None:
Expand All @@ -6418,7 +6428,11 @@ def _setitem_with_dataframe(
"Number of Input Columns must be same replacement Dataframe"
)

if len(input_df) != 0 and not input_df.index.equals(replace_df.index):
if (
not ignore_index
and len(input_df) != 0
and not input_df.index.equals(replace_df.index)
):
replace_df = replace_df.reindex(input_df.index)

for col_1, col_2 in zip(input_cols, replace_df.columns):
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9030,3 +9030,14 @@ def test_dataframe_add_suffix():
expected = pdf.add_suffix("_item")

assert_eq(got, expected)


def test_dataframe_assign_cp_np_array():
m, n = 5, 3
cp_ndarray = cupy.random.randn(m, n)
pdf = pd.DataFrame({f"f_{i}": range(m) for i in range(n)})
gdf = cudf.DataFrame({f"f_{i}": range(m) for i in range(n)})
pdf[[f"f_{i}" for i in range(n)]] = cupy.asnumpy(cp_ndarray)
gdf[[f"f_{i}" for i in range(n)]] = cp_ndarray

assert_eq(pdf, gdf)

0 comments on commit 8c8d6ef

Please sign in to comment.