-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[REVIEW] Fix dataframe setitem with ndarray
types
#10056
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1123,7 +1123,15 @@ def __setitem__(self, arg, value): | |
for col_name in self._data: | ||
self._data[col_name][mask] = value | ||
else: | ||
if isinstance(value, DataFrame): | ||
if isinstance(value, (cupy.ndarray, np.ndarray)): | ||
_setitem_with_dataframe( | ||
input_df=self, | ||
replace_df=cudf.DataFrame(value), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any chance we can make use of factory method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did check this but realized we have the logic that handles There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
input_cols=arg, | ||
mask=None, | ||
ignore_index=True, | ||
) | ||
elif isinstance(value, DataFrame): | ||
_setitem_with_dataframe( | ||
input_df=self, | ||
replace_df=value, | ||
|
@@ -6393,13 +6401,15 @@ def _setitem_with_dataframe( | |
replace_df: DataFrame, | ||
input_cols: Any = None, | ||
mask: Optional[cudf.core.column.ColumnBase] = None, | ||
ignore_index: bool = False, | ||
): | ||
""" | ||
This function sets item dataframes relevant columns with replacement df | ||
:param input_df: Dataframe to be modified inplace | ||
:param replace_df: Replacement DataFrame to replace values with | ||
:param input_cols: columns to replace in the input dataframe | ||
:param mask: boolean mask in case of masked replacing | ||
:param ignore_index: Whether to conduct index equality and reindex | ||
""" | ||
|
||
if input_cols is None: | ||
|
@@ -6410,7 +6420,11 @@ def _setitem_with_dataframe( | |
"Number of Input Columns must be same replacement Dataframe" | ||
) | ||
|
||
if len(input_df) != 0 and not input_df.index.equals(replace_df.index): | ||
if ( | ||
not ignore_index | ||
and len(input_df) != 0 | ||
and not input_df.index.equals(replace_df.index) | ||
): | ||
replace_df = replace_df.reindex(input_df.index) | ||
|
||
for col_1, col_2 in zip(input_cols, replace_df.columns): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see the array was being converted to dataframe in order to make use of
_setitem_with_dataframe
. Ideally I wish we can separate the logic of column selection from this helper and directly replace the columns targeted. But that's out of the scope for sure.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I agree, it would be nice to just convert the array to a column without having to deal with all the DataFrame junk but I don't see an easy way to do that at present :(